Skip to content

Commit

Permalink
safer Lint(), xml_nodes_to_lints() and lint() (#1788)
Browse files Browse the repository at this point in the history
* safer Lint(), xml_nodes_to_lints() and lint()

* add missing newline

* add more checks to Lint()

fixes #763

* fix surfaced errors in our own code

* fix cyclocomp

* avoid apply(..., simplify = FALSE) (R >= 4.1), try fixing r-devel

* delint

* line should have been line_number

* more line fixups

* reorder column == 0 case

* fix column number problems in r-devel

move column_number to nchar(line) + 1L if it's larger
anchor end-of-line parse error lints to nchar(line) + 1 as well instead of nchar(line)

* feedback

* fix tests

* fix apparent typo

Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
  • Loading branch information
AshesITR and MichaelChirico authored Dec 8, 2022
1 parent 354d4ee commit a936cf6
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 18 deletions.
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@

* `lint_package()` correctly finds a package from within a subdir if the `path` points to anywhere within the package (#1759, @AshesITR)

* Improved error behavior in `Lint()`, `lint()` and `xml_nodes_to_lints()` (#1427, #763, @AshesITR)
+ `Lint()` validates its inputs more thoroughly, preventing errors during `print.Lints` like "Error in rep.int(character, length) : invalid 'times' value:".
+ `lint()` no longer tries to create an expression tree with unexpected end of input errors, because they can be broken.
+ `xml_nodes_to_lints()` warns if it can't find lint locations and uses dummy locations as a fallback.

* `linters_with_defaults()` no longer erroneously marks linter factories as linters (#1725, @AshesITR).

## Changes to defaults
Expand Down
54 changes: 41 additions & 13 deletions R/get_source_expressions.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ get_source_expressions <- function(filename, lines = NULL) {
source_expression$content <- get_content(source_expression$lines)
parsed_content <- get_source_expression(source_expression, error = function(e) lint_parse_error(e, source_expression))

if (inherits(e, "lint") && (is.na(e$line) || !nzchar(e$line))) {
if (inherits(e, "lint") && (is.na(e$line) || !nzchar(e$line) || e$message == "unexpected end of input")) {
# Don't create expression list if it's unreliable (invalid encoding or unhandled parse error)
expressions <- list()
} else {
Expand Down Expand Up @@ -152,6 +152,30 @@ lint_parse_error <- function(e, source_expression) {
lint_parse_error_nonstandard(e, source_expression)
}

#' Ensure a string is valid for printing
#'
#' Helper to ensure a valid string is provided as line where necessary.
#' Handles two cases:
#'
#' 1. NA lines, as generated by running outside of code blocks in Rmd documents
#' 2. invalid string lines, as generated by invalid encoding settings
#'
#' [nchar()] can detect both cases: 1. returns `NA_integer_`, 2. errors
#'
#' @param line Possibly misencoded or NA line
#'
#' @return `line` if it's a valid non-NA string, otherwise an empty string.
#'
#' @noRd
fixup_line <- function(line) {
nchars <- tryCatch(nchar(line, type = "chars"), error = function(e) NA_integer_)
if (is.na(nchars)) {
""
} else {
line
}
}

#' Convert an R >= 4.3.0 classed parseError with metadata into a lint
#'
#' @param e A parse error of class `parseError` generated by R >= 4.3.0
Expand All @@ -162,7 +186,7 @@ lint_parse_error <- function(e, source_expression) {
#' @noRd
lint_parse_error_r43 <- function(e, source_expression) {
msg <- rex::re_substitutes(e$message, rex::rex(" (", except_some_of(")"), ")", end), "")
line <- e$lineno
line_number <- e$lineno
column <- e$colno
substr(msg, 1L, 1L) <- toupper(substr(msg, 1L, 1L))
msg <- paste0(msg, ".")
Expand All @@ -172,23 +196,27 @@ lint_parse_error_r43 <- function(e, source_expression) {
}

if (column == 0L) {
line <- line - 1L
column <- nchar(source_expression$lines[[line]])
line_number <- line_number - 1L
}

if (line < 1L || line > length(source_expression$lines)) {
if (line_number < 1L || line_number > length(source_expression$lines)) {
# Safely handle invalid location info
line <- 1L
line_number <- 1L
column <- 1L
}

line <- fixup_line(source_expression$lines[[line_number]])
if (column == 0L || column > nchar(line) + 1L) {
column <- nchar(line) + 1L
}

Lint(
filename = source_expression$filename,
line_number = line,
line_number = line_number,
column_number = column,
type = "error",
message = msg,
line = source_expression$lines[[line]]
line = line
)
}

Expand All @@ -208,10 +236,10 @@ lint_parse_error_r42 <- function(message_info, source_expression) {
# end of the previous line
if (column_number %==% 0L) {
line_number <- line_number - 1L
line <- source_expression$lines[[line_number]]
column_number <- nchar(line)
line <- fixup_line(source_expression$lines[[line_number]])
column_number <- nchar(line) + 1L
} else {
line <- source_expression$lines[[line_number]]
line <- fixup_line(source_expression$lines[[line_number]])
}

Lint(
Expand Down Expand Up @@ -249,7 +277,7 @@ lint_parse_error_nonstandard <- function(e, source_expression) {
column_number = 1L,
type = "error",
message = "Invalid multibyte character in parser. Is the encoding correct?",
line = source_expression$lines[[l]]
line = fixup_line(source_expression$lines[[l]])
)
)
} else if (grepl("invalid multibyte string, element", e$message, fixed = TRUE)) {
Expand Down Expand Up @@ -284,7 +312,7 @@ lint_parse_error_nonstandard <- function(e, source_expression) {
column_number = 1L,
type = "error",
message = sprintf("Repeated formal argument '%s'.", sym),
line = source_expression$lines[[l]]
line = fixup_line(source_expression$lines[[l]])
)
)
}
Expand Down
18 changes: 15 additions & 3 deletions R/indentation_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "al
change_type <- find_indent_type(change)
change_begin <- as.integer(xml2::xml_attr(change, "line1")) + 1L
change_end <- xml2::xml_find_num(change, xp_block_ends)
if (change_begin <= change_end) {
if (isTRUE(change_begin <= change_end)) {
to_indent <- seq(from = change_begin, to = change_end)
if (change_type == "hanging") {
expected_indent_levels[to_indent] <- as.integer(xml2::xml_attr(change, "col2"))
Expand Down Expand Up @@ -231,7 +231,12 @@ indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "al
lint_lines <- unname(as.integer(names(source_expression$file_lines)[bad_lines]))
lint_ranges <- cbind(
pmin(expected_indent_levels[bad_lines] + 1L, indent_levels[bad_lines]),
pmax(expected_indent_levels[bad_lines], indent_levels[bad_lines])
# If the expected indent is larger than the current line width, the lint range would become invalid.
# Therefor, limit range end to end of line.
pmin(
pmax(expected_indent_levels[bad_lines], indent_levels[bad_lines]),
nchar(source_expression$file_lines[bad_lines]) + 1L
)
)
Map(
Lint,
Expand All @@ -241,7 +246,14 @@ indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "al
type = "style",
message = lint_messages,
line = unname(source_expression$file_lines[bad_lines]),
ranges = apply(lint_ranges, 1L, list, simplify = FALSE)
# TODO(AshesITR) when updating supported R version to R >= 4.1:
# replace by ranges = apply(lint_ranges, 1L, list, simplify = FALSE)
ranges = lapply(
seq_along(bad_lines),
function(i) {
list(lint_ranges[i, ])
}
)
)
} else {
list()
Expand Down
44 changes: 44 additions & 0 deletions R/lint.R
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,21 @@ Lint <- function(filename, line_number = 1L, column_number = 1L, # nolint: objec
)
}

if (length(line) != 1L || !is.character(line)) {
stop("`line` must be a string.")
}
max_col <- max(nchar(line) + 1L, 1L, na.rm = TRUE)
if (!is_number(column_number) || column_number < 0L || column_number > max_col) {
stop(sprintf(
"`column_number` must be an integer between 0 and nchar(line) + 1 (%d). It was %s.",
max_col, column_number
))
}
if (!is_number(line_number) || line_number < 1L) {
stop(sprintf("`line_number` must be a positive integer. It was %s.", line_number))
}
check_ranges(ranges, max_col)

type <- match.arg(type)

obj <- list(
Expand All @@ -418,6 +433,35 @@ Lint <- function(filename, line_number = 1L, column_number = 1L, # nolint: objec
obj
}

is_number <- function(number, n = 1L) {
length(number) == n && is.numeric(number) && !anyNA(number)
}

is_valid_range <- function(range, max_col) {
0L <= range[[1L]] &&
range[[1L]] <= range[[2L]] &&
range[[2L]] <= max_col
}

check_ranges <- function(ranges, max_col) {
if (is.null(ranges)) {
return()
}
if (!is.list(ranges)) {
stop("`ranges` must be NULL or a list.")
}

for (range in ranges) {
if (!is_number(range, 2L)) {
stop("`ranges` must only contain length 2 integer vectors without NAs.")
} else if (!is_valid_range(range, max_col)) {
stop(sprintf(
"All entries in `ranges` must satisfy 0 <= range[1L] <= range[2L] <= nchar(line) + 1 (%d).", max_col
))
}
}
}

rstudio_source_markers <- function(lints) {
if (!requireNamespace("rstudioapi", quietly = TRUE)) {
stop("'rstudioapi' is required for rstudio_source_markers().") # nocov
Expand Down
12 changes: 12 additions & 0 deletions R/xml_nodes_to_lints.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,29 @@ xml_nodes_to_lints <- function(xml, source_expression, lint_message,
type <- match.arg(type, c("style", "warning", "error"))
line1 <- xml2::xml_attr(xml, "line1")
col1 <- xp_find_location(xml, range_start_xpath)
if (is.na(col1)) {
warning("Could not find range start for lint. Defaulting to start of line.")
col1 <- 1L
}

lines <- source_expression[["lines"]]
if (is.null(lines)) lines <- source_expression[["file_lines"]]

if (xml2::xml_attr(xml, "line2") == line1) {
col2 <- xp_find_location(xml, range_end_xpath)
if (is.na(col2)) {
warning("Could not find range end for lint. Defaulting to width 1.")
col2 <- col1
}
} else {
col2 <- nchar(lines[[line1]])
}

column_number <- xp_find_location(xml, column_number_xpath)
if (is.na(column_number)) {
warning("Could not find location for lint. Defaulting to start of range.")
column_number <- col1
}

Lint(
filename = source_expression$filename,
Expand Down
5 changes: 5 additions & 0 deletions tests/testthat/test-indentation_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -690,3 +690,8 @@ test_that("native pipe is supported", {
linter
)
})

test_that("it doesn't error on invalid code", {
# Part of #1427
expect_lint("function() {)", list(linter = "error", message = rex::rex("unexpected ')'")), indentation_linter())
})
27 changes: 27 additions & 0 deletions tests/testthat/test-lint.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
test_that("Lint() errors on invalid input", {
dummy_line <- "abc"
expect_error(
Lint("dummy.R", line = dummy_line, column_number = NA_integer_),
rex::rex("`column_number` must be an integer between 0 and nchar(line) + 1 (4). It was NA.")
)
expect_error(
Lint("dummy.R", line = dummy_line, line_number = 0L),
rex::rex("`line_number` must be a positive integer. It was 0.")
)
expect_error(
Lint("dummy.R", ranges = c(1L, 3L)),
rex::rex("`ranges` must be NULL or a list.")
)
expect_error(
Lint("dummy.R", ranges = list(1L)),
rex::rex("`ranges` must only contain length 2 integer vectors without NAs.")
)
expect_error(
Lint("dummy.R", ranges = list(c(1L, NA_integer_))),
rex::rex("`ranges` must only contain length 2 integer vectors without NAs.")
)
expect_error(
Lint("dummy.R", line = dummy_line, ranges = list(c(1L, 2L), c(1L, 5L))),
rex::rex("All entries in `ranges` must satisfy 0 <= range[1L] <= range[2L] <= nchar(line) + 1 (4).")
)
})
2 changes: 1 addition & 1 deletion tests/testthat/test-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ test_that("as.data.frame.lints", {
type = "error",
message = "Under no circumstances is the use of foobar allowed.",
line = "a <- 1",
ranges = list(c(1L, 2L), c(10L, 20L))
ranges = list(c(1L, 2L), c(6L, 7L))
),
"lint"
)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-rstudio_markers.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ test_that("it returns markers which match lints", {
Lint(
filename = "test_file2",
line_number = 10L,
column_number = 5L,
column_number = 1L,
type = "warning",
message = "test a message"
)
Expand Down
12 changes: 12 additions & 0 deletions tests/testthat/test-spaces_left_parentheses_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,15 @@ test_that("doesn't produce a warning", {
)
)
})

test_that("it doesn't produce invalid lints", {
# Part of #1427
expect_warning(
expect_lint(
"function() {)",
list(list(linter = "function_left_parentheses_linter", ranges = list(c(9L, 9L))), list(linter = "error")),
function_left_parentheses_linter()
),
rex::rex("Could not find range end for lint. Defaulting to width 1.")
)
})
48 changes: 48 additions & 0 deletions tests/testthat/test-xml_nodes_to_lints.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,54 @@ test_that("it handles multi-line lints correctly", {
expect_identical(l$ranges, list(as.integer(c(xml2::xml_attr(node, "col1"), nchar(code[1L])))))
})

test_that("it doesn't produce invalid lints", {
# Part of #1427
code <- "before %+% after"
tmpfile <- withr::local_tempfile(lines = code)
expr <- get_source_expressions(tmpfile)$expressions[[2L]]
xml <- expr$full_xml_parsed_content
node <- xml2::xml_find_first(xml, "//SPECIAL")

# We can produce invalid location xpaths by requiring any non-existent node
xp_column_number <- "number(./preceding-sibling::*[1]/@col2 + 1)"
xp_range_start <- "number(./preceding-sibling::*[1]/@col1)"
xp_range_end <- "number(./following-sibling::*[1]/@col2)"
xp_invalid <- "number(./DOES-NOT-EXIST/@col1)"

expect_warning(l_invalid_loc1 <- xml_nodes_to_lints(
xml = node,
source_expression = expr,
lint_message = "lint_msg",
column_number_xpath = xp_column_number,
range_start_xpath = xp_invalid,
range_end_xpath = xp_range_end
), rex::rex("Could not find range start for lint. Defaulting to start of line."))
expect_identical(l_invalid_loc1$column_number, nchar("before") + 1L)
expect_identical(l_invalid_loc1$ranges, list(c(1L, nchar(code))))

expect_warning(l_invalid_loc2 <- xml_nodes_to_lints(
xml = node,
source_expression = expr,
lint_message = "lint_msg",
column_number_xpath = xp_column_number,
range_start_xpath = xp_range_start,
range_end_xpath = xp_invalid
), rex::rex("Could not find range end for lint. Defaulting to width 1."))
expect_identical(l_invalid_loc2$column_number, nchar("before") + 1L)
expect_identical(l_invalid_loc2$ranges, list(c(1L, 1L)))

expect_warning(l_invalid_col <- xml_nodes_to_lints(
xml = node,
source_expression = expr,
lint_message = "lint_msg",
column_number_xpath = xp_invalid,
range_start_xpath = xp_range_start,
range_end_xpath = xp_range_end
), rex::rex("Could not find location for lint. Defaulting to start of range."))
expect_identical(l_invalid_col$column_number, 1L)
expect_identical(l_invalid_col$ranges, list(c(1L, nchar(code))))
})

test_that("erroneous input errors", {
expect_error(xml_nodes_to_lints(1L), "Expected an xml_nodeset", fixed = TRUE)
})

0 comments on commit a936cf6

Please sign in to comment.