Skip to content

Commit

Permalink
Tab correction (#237)
Browse files Browse the repository at this point in the history
* Added proper handling of tab characters (fixes #44)

* Test update for tab linter

+ small doc update

* Add link to line of R's source code responsible for tab indentation

* Added a test function to remove redundant code

* don't pass lines to fix_tab_indentations separately

* Function which calculates tab width now ~2x as fast

* Small fixes

* Small rework

* Properly handle multiline expressions

... and halved the time to correct tab indentations

* Further tab indentation correction speed optimizations
  • Loading branch information
fangly authored and jimhester committed May 11, 2017
1 parent 65b312f commit aa70430
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 17 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# lintr 1.0.0.9001 #
* Added proper handling of tab characters (fixes #44, @fangly)
* Fix line number sometimes wrongly reported by no_tab_linter() (#134, @fangly)
* Fix line and column number sometimes wrongly reported by spaces_inside_linter()
(#203, @fangly)
Expand Down
1 change: 0 additions & 1 deletion R/extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ extract_r_source <- function(filename, lines) {
}
},
starts, ends)
output
replace_prefix(output, pattern$chunk.code)
}

Expand Down
74 changes: 69 additions & 5 deletions R/get_source_expressions.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ get_source_file <- function(source_file, error = identity) {
assign("e", e, envir = parent.frame())
}

fix_eq_assign(adjust_columns(getParseData(source_file)))
fix_eq_assigns(fix_column_numbers(fix_tab_indentations(source_file)))
}

find_line_fun <- function(content) {
Expand Down Expand Up @@ -168,9 +168,8 @@ find_column_fun <- function(content) {
}
}

# This is used to adjust the columns that getParseData reports from bytes to
# letters.
adjust_columns <- function(content) {
# Adjust the columns that getParseData reports from bytes to characters.
fix_column_numbers <- function(content) {
if (is.null(content)) {
return(NULL)
}
Expand Down Expand Up @@ -207,9 +206,74 @@ adjust_columns <- function(content) {
}


# Fix column numbers when there are tabs
# getParseData() counts 1 tab as a variable number of spaces instead of one:
# https://github.com/wch/r-source/blame/e7401b68ab0e032fce3e376aaca9a5431619b2b4/src/main/gram.y#L512
# The number of spaces is so that the code is brought to the next 8-character indentation level e.g:
# "1\t;" -> "1 ;"
# "12\t;" -> "12 ;"
# "123\t;" -> "123 ;"
# "1234\t;" -> "1234 ;"
# "12345\t;" -> "12345 ;"
# "123456\t;" -> "123456 ;"
# "1234567\t;" -> "1234567 ;"
# "12345678\t;" -> "12345678 ;"
# "123456789\t;" -> "123456789 ;"
# "1234567890\t;" -> "1234567890 ;"
fix_tab_indentations <- function(source_file) {
pc <- getParseData(source_file)

if (is.null(pc)) {
return(NULL)
}

tab_cols <- gregexpr("\t", source_file[["lines"]], fixed = TRUE)
names(tab_cols) <- seq_along(tab_cols)
tab_cols <- tab_cols[!is.na(tab_cols)] # source lines from .Rmd and other files are NA
tab_cols <- lapply(tab_cols, function(x) {if (x[[1L]] < 0L) {NA} else {x}})
tab_cols <- tab_cols[!is.na(tab_cols)]

if (!length(tab_cols)) {
return(pc)
}

pc_cols <- c("line1", "line2", "col1", "col2")
dat <- matrix(data = unlist(pc[, pc_cols], use.names = FALSE), ncol = 2)
lines <- as.integer(names(tab_cols))
for (i in seq_along(tab_cols)) {
is_curr_line <- dat[, 1L] == lines[[i]]
if (any(is_curr_line)) {
line_tab_offsets <- tab_offsets(tab_cols[[i]])
for (j in seq_along(tab_cols[[i]])) {
is_line_to_change <- is_curr_line & dat[, 2L] > tab_cols[[i]][[j]]
if (any(is_line_to_change)) {
dat[is_line_to_change, 2L] <- dat[is_line_to_change, 2L] - line_tab_offsets[[j]]
}
}
}
}
pc[, pc_cols] <- dat
pc
}


tab_offsets <- function(tab_columns) {
cum_offset <- 0L
vapply(
tab_columns - 1L,
function(tab_idx) {
offset <- 7L - (tab_idx + cum_offset) %% 8L # using a tab width of 8 characters
cum_offset <<- cum_offset + offset
offset
},
integer(1L),
USE.NAMES = FALSE
)
}

# This function wraps equal assign expressions in a parent expression so they
# are the same as the corresponding <- expression
fix_eq_assign <- function(pc) {
fix_eq_assigns <- function(pc) {
if (is.null(pc)) {
return(NULL)
}
Expand Down
4 changes: 2 additions & 2 deletions R/lint.R
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,11 @@ pkg_name <- function(path = find_package()) {
#' Create a \code{Lint} object
#' @param filename path to the source file that was linted.
#' @param line_number line number where the lint occurred.
#' @param column_number column the lint occurred.
#' @param column_number column number where the lint occurred.
#' @param type type of lint.
#' @param message message used to describe the lint error
#' @param line code source where the lint occured
#' @param ranges ranges on the line that should be emphasized.
#' @param ranges a list of ranges on the line that should be emphasized.
#' @param linter name of linter that created the Lint object.
#' @export
Lint <- function(filename, line_number = 1L, column_number = 1L,
Expand Down
3 changes: 0 additions & 3 deletions R/no_tab_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ no_tab_linter <- function(source_file) {
type = "style",
message = "Use spaces to indent, not tabs.",
line = source_file$lines[[as.character(line_number)]],

# R outputs tabs with 8 spaces
# TODO: this is incorrect for embedded tabs, I am not going to fix it.
ranges = list(c(start, end)),
linter = "no_tab_linter"
)
Expand Down
4 changes: 2 additions & 2 deletions man/Lint.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 62 additions & 0 deletions tests/testthat/test-get_source_expressions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
context("get_source_expression")


with_content_to_parse <- function(content, code) {
f <- tempfile()
on.exit(unlink(f))
writeLines(content, f)
pc <- lapply(get_source_expressions(f)[["expressions"]], `[[`, "parsed_content")
eval(substitute(code))
}


test_that("tab positions have been corrected", {
with_content_to_parse("1\n\t",
expect_length(pc, 2L)
)

with_content_to_parse("TRUE",
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "TRUE", c("col1", "col2")], c(1L, 4L))
)

with_content_to_parse("\tTRUE",
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "TRUE", c("col1", "col2")], c(2L, 5L))
)

with_content_to_parse("\t\tTRUE",
expect_equivalent(pc[[1]][pc[[1L]][["text"]] == "TRUE", c("col1", "col2")], c(3L, 6L))
)

with_content_to_parse("x\t<-\tTRUE", {
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "x", c("col1", "col2")], c(1L, 1L))
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "<-", c("col1", "col2")], c(3L, 4L))
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "TRUE", c("col1", "col2")], c(6L, 9L))
})

with_content_to_parse("\tfunction\t(x)\t{\tprint(pc[\t,1])\t;\t}", {
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "function", c("col1", "col2")], c(2L, 9L))
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "x", c("col1", "col2")], c(12L, 12L))
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "print", c("col1", "col2")], c(17L, 21L))
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == ";", c("col1", "col2")], c(32L, 32L))
expect_equivalent(pc[[1L]][pc[[1L]][["text"]] == "}", c("col1", "col2")], c(34L, 34L))
})

with_content_to_parse("# test tab\n\ns <- 'I have \\t a dog'\nrep(\ts, \t3)", {
expect_equivalent(
pc[[2L]][pc[[2L]][["text"]] == "'I have \\t a dog'", c("line1", "col1", "col2")],
c(3L, 6L, 22L)
)
expect_equivalent(
pc[[3L]][pc[[3L]][["text"]] == "3", c("line1", "col1", "col2")],
c(4L, 10L, 10L)
)
})

with_content_to_parse("function(){\nTRUE\n\t}", {
expect_equivalent(
pc[[1L]][1L, c("line1", "col1", "line2", "col2")],
c(1L, 1L, 3L, 2L),
info = "expression that spans several lines"
)
})
})
14 changes: 10 additions & 4 deletions tests/testthat/test-no_tab_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@ test_that("returns the correct linting", {

expect_lint("#\tblah", NULL, no_tab_linter)

expect_lint("\tblah", c(message = msg, line_number = 1L), no_tab_linter)
expect_lint(
"\tblah",
list(c(message = msg, line_number = 1L, column_number = 1L)),
no_tab_linter
)

expect_lint("\n\t\tblah", c(message = msg, line_number = 2L), no_tab_linter)

# Note: no tests of column number since they are currently incorrect (tabs converted to 8 spaces)
expect_lint(
"\n\t\t\tblah",
list(c(message = msg, line_number = 2L, column_number = 1L)),
no_tab_linter
)
})

0 comments on commit aa70430

Please sign in to comment.