Skip to content

Commit

Permalink
Added proper handling of tab characters (fixes #44)
Browse files Browse the repository at this point in the history
  • Loading branch information
fangly committed May 5, 2017
1 parent 65b312f commit adebf47
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 4 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# lintr 1.0.0.9001 #
* Added proper handling of tab characters (fixes #44, @fangly)
* Fix line number sometimes wrongly reported by no_tab_linter() (#134, @fangly)
* Fix line and column number sometimes wrongly reported by spaces_inside_linter()
(#203, @fangly)
Expand Down
67 changes: 66 additions & 1 deletion R/get_source_expressions.R
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ get_source_file <- function(source_file, error = identity) {
assign("e", e, envir = parent.frame())
}

fix_eq_assign(adjust_columns(getParseData(source_file)))
fix_eq_assign(fix_tab_indentation(adjust_columns(getParseData(source_file)), source_file$lines))
}

find_line_fun <- function(content) {
Expand Down Expand Up @@ -206,6 +206,71 @@ adjust_columns <- function(content) {
content
}

# Restore column numbers without tab indentation
#
# parse() and thus getParseData() count 1 tab as a variable number of spaces (see src/main/gram.c).
# The number of spaces is just so that the code is brought to the next 8-character indentation level
# e.g.:
# "1\t;" -> "1 ;"
# "12\t;" -> "12 ;"
# "123\t;" -> "123 ;"
# "1234\t;" -> "1234 ;"
# "12345\t;" -> "12345 ;"
# "123456\t;" -> "123456 ;"
# "1234567\t;" -> "1234567 ;"
# "12345678\t;" -> "12345678 ;"
# "123456789\t;" -> "123456789 ;"
# "1234567890\t;" -> "1234567890 ;"
# Fix the column numbers so that each tab counts as a single character, not a tab indentation.
fix_tab_indentation <- function(pc, lines) {
tab_cols <- re_matches(lines, "\t", global = TRUE, locations = TRUE)
tab_cols <- lapply(
tab_cols,
function(cols) {
start_cols <- cols[["start"]]
if (!is.na(start_cols[[1L]])) {
start_cols
} else {
NA
}
}
)
names(tab_cols) <- seq_along(tab_cols)
tab_cols <- tab_cols[!is.na(tab_cols)]

for (line in names(tab_cols)) {
tab_widths <- tab_widths(tab_cols[[line]])
which_lines <- pc[["line1"]] == as.integer(line)
cols <- pc[which_lines, c("col1", "col2")]
if (nrow(cols)) {
for (tab_col in tab_cols[[line]]) {
which_cols <- cols > tab_col
cols[which_cols] <- cols[which_cols] - tab_widths[[as.character(tab_col)]] + 1L
pc[which_lines, c("col1", "col2")] <- cols
}
}
}

pc
}


tab_widths <- function(tab_columns, indent_width = 8L) {
nms <- as.character(tab_columns)
widths <- vapply(
seq_along(tab_columns),
function(i) {
tab_col <- tab_columns[[i]]
tab_width <- indent_width - (tab_col - 1L) %% indent_width
which_cols <- tab_columns > tab_col
tab_columns[which_cols] <<- tab_columns[which_cols] + tab_width - 1L
tab_width
},
integer(1L)
)
names(widths) <- nms
widths
}

# This function wraps equal assign expressions in a parent expression so they
# are the same as the corresponding <- expression
Expand Down
3 changes: 0 additions & 3 deletions R/no_tab_linter.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ no_tab_linter <- function(source_file) {
type = "style",
message = "Use spaces to indent, not tabs.",
line = source_file$lines[[as.character(line_number)]],

# R outputs tabs with 8 spaces
# TODO: this is incorrect for embedded tabs, I am not going to fix it.
ranges = list(c(start, end)),
linter = "no_tab_linter"
)
Expand Down
42 changes: 42 additions & 0 deletions tests/testthat/test-get_source_expressions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
context("get_source_expression")

test_that("tab positions have been corrected", {
f <- tempfile()

writeLines("1\n\t", f)
expect_error(lintr:::get_source_expressions(f), NA, label="empty parsed_content line")

writeLines("TRUE", f)
pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]]
expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(1L, 4L))

writeLines("\tTRUE", f)
pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]]
expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(2L, 5L))

writeLines("\t\tTRUE", f)
pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]]
expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(3L, 6L))

writeLines("n\t<=\tTRUE", f)
pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]]
expect_equivalent(pc[pc[["text"]] == "n", c("col1", "col2")], c(1L, 1L))
expect_equivalent(pc[pc[["text"]] == "<=", c("col1", "col2")], c(3L, 4L))
expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(6L, 9L))

writeLines("\tfunction\t(x)\t{\tprint(pc[\t,1])\t;\t}", f)
pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]]
expect_equivalent(pc[pc[["text"]] == "function", c("col1", "col2")], c(2L, 9L))
expect_equivalent(pc[pc[["text"]] == "x", c("col1", "col2")], c(12L, 12L))
expect_equivalent(pc[pc[["text"]] == "print", c("col1", "col2")], c(17L, 21L))
expect_equivalent(pc[pc[["text"]] == ";", c("col1", "col2")], c(32L, 32L))
expect_equivalent(pc[pc[["text"]] == "}", c("col1", "col2")], c(34L, 34L))

writeLines("# test tab\n\ns <- 'I have \\t a dog'\nrep(\ts, \t3)", f)
y <- lintr:::get_source_expressions(f)[["expressions"]]
pc <- y[[2]][["parsed_content"]]
expect_equivalent(pc[pc[["token"]] == "STR_CONST", c("line1", "col1", "col2")], c(3L, 6L, 22L))
pc <- y[[3]][["parsed_content"]]
expect_equivalent(pc[pc[["token"]] == "NUM_CONST", c("line1", "col1", "col2")], c(4L, 10L, 10L))

})

0 comments on commit adebf47

Please sign in to comment.