From adebf4723f9f786d23233048894e28985535cff3 Mon Sep 17 00:00:00 2001 From: Florent Angly Date: Fri, 5 May 2017 09:18:23 +0200 Subject: [PATCH] Added proper handling of tab characters (fixes #44) --- NEWS.md | 1 + R/get_source_expressions.R | 67 +++++++++++++++++++- R/no_tab_linter.R | 3 - tests/testthat/test-get_source_expressions.R | 42 ++++++++++++ 4 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-get_source_expressions.R diff --git a/NEWS.md b/NEWS.md index d87df3414..78f038cc7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,5 @@ # lintr 1.0.0.9001 # +* Added proper handling of tab characters (fixes #44, @fangly) * Fix line number sometimes wrongly reported by no_tab_linter() (#134, @fangly) * Fix line and column number sometimes wrongly reported by spaces_inside_linter() (#203, @fangly) diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R index 1f09e9a8b..49441a53c 100644 --- a/R/get_source_expressions.R +++ b/R/get_source_expressions.R @@ -132,7 +132,7 @@ get_source_file <- function(source_file, error = identity) { assign("e", e, envir = parent.frame()) } - fix_eq_assign(adjust_columns(getParseData(source_file))) + fix_eq_assign(fix_tab_indentation(adjust_columns(getParseData(source_file)), source_file$lines)) } find_line_fun <- function(content) { @@ -206,6 +206,71 @@ adjust_columns <- function(content) { content } +# Restore column numbers without tab indentation +# +# parse() and thus getParseData() count 1 tab as a variable number of spaces (see src/main/gram.c). +# The number of spaces is just so that the code is brought to the next 8-character indentation level +# e.g.: +# "1\t;" -> "1 ;" +# "12\t;" -> "12 ;" +# "123\t;" -> "123 ;" +# "1234\t;" -> "1234 ;" +# "12345\t;" -> "12345 ;" +# "123456\t;" -> "123456 ;" +# "1234567\t;" -> "1234567 ;" +# "12345678\t;" -> "12345678 ;" +# "123456789\t;" -> "123456789 ;" +# "1234567890\t;" -> "1234567890 ;" +# Fix the column numbers so that each tab counts as a single character, not a tab indentation. +fix_tab_indentation <- function(pc, lines) { + tab_cols <- re_matches(lines, "\t", global = TRUE, locations = TRUE) + tab_cols <- lapply( + tab_cols, + function(cols) { + start_cols <- cols[["start"]] + if (!is.na(start_cols[[1L]])) { + start_cols + } else { + NA + } + } + ) + names(tab_cols) <- seq_along(tab_cols) + tab_cols <- tab_cols[!is.na(tab_cols)] + + for (line in names(tab_cols)) { + tab_widths <- tab_widths(tab_cols[[line]]) + which_lines <- pc[["line1"]] == as.integer(line) + cols <- pc[which_lines, c("col1", "col2")] + if (nrow(cols)) { + for (tab_col in tab_cols[[line]]) { + which_cols <- cols > tab_col + cols[which_cols] <- cols[which_cols] - tab_widths[[as.character(tab_col)]] + 1L + pc[which_lines, c("col1", "col2")] <- cols + } + } + } + + pc +} + + +tab_widths <- function(tab_columns, indent_width = 8L) { + nms <- as.character(tab_columns) + widths <- vapply( + seq_along(tab_columns), + function(i) { + tab_col <- tab_columns[[i]] + tab_width <- indent_width - (tab_col - 1L) %% indent_width + which_cols <- tab_columns > tab_col + tab_columns[which_cols] <<- tab_columns[which_cols] + tab_width - 1L + tab_width + }, + integer(1L) + ) + names(widths) <- nms + widths +} # This function wraps equal assign expressions in a parent expression so they # are the same as the corresponding <- expression diff --git a/R/no_tab_linter.R b/R/no_tab_linter.R index bd2a2a183..46ad73315 100644 --- a/R/no_tab_linter.R +++ b/R/no_tab_linter.R @@ -26,9 +26,6 @@ no_tab_linter <- function(source_file) { type = "style", message = "Use spaces to indent, not tabs.", line = source_file$lines[[as.character(line_number)]], - - # R outputs tabs with 8 spaces - # TODO: this is incorrect for embedded tabs, I am not going to fix it. ranges = list(c(start, end)), linter = "no_tab_linter" ) diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R new file mode 100644 index 000000000..9394f13e2 --- /dev/null +++ b/tests/testthat/test-get_source_expressions.R @@ -0,0 +1,42 @@ +context("get_source_expression") + +test_that("tab positions have been corrected", { + f <- tempfile() + + writeLines("1\n\t", f) + expect_error(lintr:::get_source_expressions(f), NA, label="empty parsed_content line") + + writeLines("TRUE", f) + pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]] + expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(1L, 4L)) + + writeLines("\tTRUE", f) + pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]] + expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(2L, 5L)) + + writeLines("\t\tTRUE", f) + pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]] + expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(3L, 6L)) + + writeLines("n\t<=\tTRUE", f) + pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]] + expect_equivalent(pc[pc[["text"]] == "n", c("col1", "col2")], c(1L, 1L)) + expect_equivalent(pc[pc[["text"]] == "<=", c("col1", "col2")], c(3L, 4L)) + expect_equivalent(pc[pc[["text"]] == "TRUE", c("col1", "col2")], c(6L, 9L)) + + writeLines("\tfunction\t(x)\t{\tprint(pc[\t,1])\t;\t}", f) + pc <- lintr:::get_source_expressions(f)[["expressions"]][[1L]][["parsed_content"]] + expect_equivalent(pc[pc[["text"]] == "function", c("col1", "col2")], c(2L, 9L)) + expect_equivalent(pc[pc[["text"]] == "x", c("col1", "col2")], c(12L, 12L)) + expect_equivalent(pc[pc[["text"]] == "print", c("col1", "col2")], c(17L, 21L)) + expect_equivalent(pc[pc[["text"]] == ";", c("col1", "col2")], c(32L, 32L)) + expect_equivalent(pc[pc[["text"]] == "}", c("col1", "col2")], c(34L, 34L)) + + writeLines("# test tab\n\ns <- 'I have \\t a dog'\nrep(\ts, \t3)", f) + y <- lintr:::get_source_expressions(f)[["expressions"]] + pc <- y[[2]][["parsed_content"]] + expect_equivalent(pc[pc[["token"]] == "STR_CONST", c("line1", "col1", "col2")], c(3L, 6L, 22L)) + pc <- y[[3]][["parsed_content"]] + expect_equivalent(pc[pc[["token"]] == "NUM_CONST", c("line1", "col1", "col2")], c(4L, 10L, 10L)) + +})