From 2260f381839bff5d6ae32c2df204419bed2b7921 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Tue, 16 Mar 2021 18:28:46 +0100
Subject: [PATCH 01/26] add TODO comments to all parts that need changing

---
 .dev/compare_branches.R                      | 2 +-
 R/cache.R                                    | 2 +-
 R/get_source_expressions.R                   | 4 ++--
 tests/testthat/test-cache.R                  | 2 ++
 tests/testthat/test-exclusions.R             | 2 ++
 tests/testthat/test-get_source_expressions.R | 2 ++
 6 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/.dev/compare_branches.R b/.dev/compare_branches.R
index 6124308a8..fa5824334 100755
--- a/.dev/compare_branches.R
+++ b/.dev/compare_branches.R
@@ -146,7 +146,7 @@ test_encoding <- function(dir) {
   tryCatch({
     lapply(
       list.files(dir, pattern = "(?i)\\.r(?:md)?$", recursive = TRUE, full.names = TRUE),
-      function(x) nchar(readLines(x, warn = FALSE))
+      function(x) nchar(readLines(x, warn = FALSE)) # TODO respect encoding
     )
     FALSE
   }, error = function(x) TRUE)
diff --git a/R/cache.R b/R/cache.R
index 7a412be9e..9a517e1d8 100644
--- a/R/cache.R
+++ b/R/cache.R
@@ -119,7 +119,7 @@ digest_content <- function(linters, obj) {
     list(linters, obj$content, is.null(obj$parsed_content))
   } else {
     # assume a filename
-    list(linters, readLines(obj))
+    list(linters, readLines(obj)) # TODO respect encoding
   }
   digest::digest(content, algo = "sha1")
 }
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index 49031f9d3..1ad8d3f5c 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -26,7 +26,7 @@
 #'   consisting of 6 elements:
 #'   \itemize{
 #'     \item{`filename` (`character`)}
-#'     \item{`file_lines` (`character`) the [readLines()] output for this file}
+#'     \item{`file_lines` (`character`) the [readLines()] output for this file} # TODO document encoding
 #'     \item{`content` (`character`) for .R files, the same as `file_lines`;
 #'           for .Rmd scripts, this is the extracted R source code (as text)}
 #'     \item{`full_parsed_content` (`data.frame`) as given by
@@ -38,7 +38,7 @@
 #'   }
 #'   }
 #'   \item{error}{A `Lint` object describing any parsing error.}
-#'   \item{lines}{The [readLines()] output for this file.}
+#'   \item{lines}{The [readLines()] output for this file.} # TODO document encoding
 #' @export
 #' @md
 get_source_expressions <- function(filename, lines = NULL) {
diff --git a/tests/testthat/test-cache.R b/tests/testthat/test-cache.R
index 9d4693a01..941e7a8cd 100644
--- a/tests/testthat/test-cache.R
+++ b/tests/testthat/test-cache.R
@@ -480,3 +480,5 @@ test_that("cache = TRUE works with nolint", {
   writeLines("1+1 # nolint\n", file)
   expect_length(lint(file, linters, cache = TRUE), 0)
 })
+
+# TODO test encoding-specific behaviour
diff --git a/tests/testthat/test-exclusions.R b/tests/testthat/test-exclusions.R
index d053dbe1d..f650feca8 100644
--- a/tests/testthat/test-exclusions.R
+++ b/tests/testthat/test-exclusions.R
@@ -26,4 +26,6 @@ test_that("it excludes properly", {
   }
 })
 
+# TODO test encoding-specific behaviour
+
 options(old_ops)
diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
index 59d676318..e0ff739b6 100644
--- a/tests/testthat/test-get_source_expressions.R
+++ b/tests/testthat/test-get_source_expressions.R
@@ -90,3 +90,5 @@ test_that("Multi-byte character truncated by parser is ignored", {
     expect_equal(error$column_number, 8L)
   })
 })
+
+# TODO test encoding-specific behaviour

From 643a918ce36003f4a11915df7aaf2350594200ca Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Tue, 16 Mar 2021 18:59:18 +0100
Subject: [PATCH 02/26] add encoding setting

---
 R/lint.R     | 17 +++++++++++++++++
 R/settings.R | 43 +++++++++++++++++++++++++++++++++++++++++++
 R/zzz.R      |  1 +
 3 files changed, 61 insertions(+)

diff --git a/R/lint.R b/R/lint.R
index 59d0e294d..513ace6eb 100644
--- a/R/lint.R
+++ b/R/lint.R
@@ -376,6 +376,23 @@ find_package <- function(path) {
   path
 }
 
+has_rproj <- function(path) {
+  length(list.files(path = path, pattern = "\\.Rproj$")) > 0L
+}
+
+find_rproj <- function(path) {
+  path <- normalizePath(path, mustWork = FALSE)
+
+  while (!has_rproj(path)) {
+    path <- dirname(path)
+    if (is_root(path)) {
+      return(NULL)
+    }
+  }
+
+  list.files(path = path, pattern = "\\.Rproj$", full.names = TRUE)[1L]
+}
+
 is_root <- function(path) {
   identical(path, dirname(path))
 }
diff --git a/R/settings.R b/R/settings.R
index 113c20bca..555e12121 100644
--- a/R/settings.R
+++ b/R/settings.R
@@ -17,6 +17,11 @@ read_settings <- function(filename) {
   clear_settings()
 
   config_file <- find_config(filename)
+  default_encoding <- find_default_encoding(filename)
+  if (!is.null(default_encoding)) {
+    # Locally override the default for encoding if we found a smart default
+    default_settings[["encoding"]] <- default_encoding
+  }
 
   if (!is.null(config_file)) {
     f <- function(e) {
@@ -100,6 +105,44 @@ find_config <- function(filename) {
   NULL
 }
 
+find_default_encoding <- function(filename) {
+  if (is.null(filename)) {
+    return(NULL)
+  }
+
+  pkg_path <- find_package(filename)
+  if (!is.null(pkg_path)) {
+    # Get Encoding from DESCRIPTION
+    dcf <- tryCatch(
+      read.dcf(file.path(pkg_path, "DESCRIPTION")),
+      error = function(e) NULL
+    )
+    if (!is.null(dcf) && nrow(dcf) >= 1L && "Encoding" %in% colnames(dcf)) {
+      return(unname(dcf[1L, "Encoding"]))
+    }
+  }
+
+  rproj_file <- find_rproj(filename)
+  if (!is.null(rproj_file)) {
+    # Get Encoding from .Rproj
+    dcf <- tryCatch(
+      read.dcf(rproj_file),
+      error = function(e) NULL,
+      warning = function(e) NULL
+    )
+
+    if (!is.null(dcf) && nrow(dcf) >= 1L && "Encoding" %in% colnames(dcf)) {
+      encodings <- dcf[, "Encoding"]
+      encodings <- encodings[!is.na(encodings)]
+      if (length(encodings) > 0L) {
+        return(encodings[1L])
+      }
+    }
+  }
+
+  NULL
+}
+
 is_directory <- function(filename) {
   is_dir <- file.info(filename)$isdir
 
diff --git a/R/zzz.R b/R/zzz.R
index c7a37b1f5..c64baa876 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -200,6 +200,7 @@ settings <- NULL
 
   default_settings <<- list(
     linters = default_linters,
+    encoding = "UTF-8",
     exclude = rex::rex("#", any_spaces, "nolint"),
     exclude_start = rex::rex("#", any_spaces, "nolint start"),
     exclude_end = rex::rex("#", any_spaces, "nolint end"),

From 6aa3dd32c44b45ecde47cbfb6cf65f94422d0fac Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 17 Mar 2021 19:21:26 +0100
Subject: [PATCH 03/26] add ISO8859-1 tests for Rproj and packages

---
 R/settings.R                                  | 10 ++-
 .../dummy_packages/cp1252/.Rbuildignore       |  2 +
 .../dummy_packages/cp1252/DESCRIPTION         | 12 ++++
 .../testthat/dummy_packages/cp1252/NAMESPACE  |  1 +
 .../cp1252/R/metropolis-hastings-rho.R        | 71 +++++++++++++++++++
 .../dummy_packages/cp1252/cp1252.Rproj        | 20 ++++++
 .../project/default_linter_testcode.R         | 46 ++++++++++++
 .../project/metropolis-hastings-rho.R         | 71 +++++++++++++++++++
 .../dummy_projects/project/project.Rproj      | 16 +++++
 tests/testthat/test-settings.R                | 11 +++
 10 files changed, 259 insertions(+), 1 deletion(-)
 create mode 100644 tests/testthat/dummy_packages/cp1252/.Rbuildignore
 create mode 100644 tests/testthat/dummy_packages/cp1252/DESCRIPTION
 create mode 100644 tests/testthat/dummy_packages/cp1252/NAMESPACE
 create mode 100644 tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R
 create mode 100644 tests/testthat/dummy_packages/cp1252/cp1252.Rproj
 create mode 100644 tests/testthat/dummy_projects/project/default_linter_testcode.R
 create mode 100644 tests/testthat/dummy_projects/project/metropolis-hastings-rho.R
 create mode 100644 tests/testthat/dummy_projects/project/project.Rproj

diff --git a/R/settings.R b/R/settings.R
index 555e12121..40ea790bb 100644
--- a/R/settings.R
+++ b/R/settings.R
@@ -111,6 +111,15 @@ find_default_encoding <- function(filename) {
   }
 
   pkg_path <- find_package(filename)
+  rproj_file <- find_rproj(filename)
+
+  if (!is.null(rproj_file) && !is.null(pkg_path) && startsWith(rproj_file, pkg_path)) {
+    # Check precedence via directory hierarchy.
+    # Both paths are normalized so checking if rproj_file is within pkg_path is sufficient.
+    # Force reading from Rproj file
+    pkg_path <- NULL
+  }
+
   if (!is.null(pkg_path)) {
     # Get Encoding from DESCRIPTION
     dcf <- tryCatch(
@@ -122,7 +131,6 @@ find_default_encoding <- function(filename) {
     }
   }
 
-  rproj_file <- find_rproj(filename)
   if (!is.null(rproj_file)) {
     # Get Encoding from .Rproj
     dcf <- tryCatch(
diff --git a/tests/testthat/dummy_packages/cp1252/.Rbuildignore b/tests/testthat/dummy_packages/cp1252/.Rbuildignore
new file mode 100644
index 000000000..91114bf2f
--- /dev/null
+++ b/tests/testthat/dummy_packages/cp1252/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/tests/testthat/dummy_packages/cp1252/DESCRIPTION b/tests/testthat/dummy_packages/cp1252/DESCRIPTION
new file mode 100644
index 000000000..15288c4c4
--- /dev/null
+++ b/tests/testthat/dummy_packages/cp1252/DESCRIPTION
@@ -0,0 +1,12 @@
+Package: cp1252
+Type: Package
+Title: What the Package Does (Title Case)
+Version: 0.1.0
+Author: Who wrote it
+Maintainer: The package maintainer <yourself@somewhere.net>
+Description: More about what it does (maybe more than one line)
+    Use four spaces when indenting paragraphs within the Description.
+License: What license is it under?
+Encoding: UTF-8
+LazyData: true
+Roxygen: list(markdown = TRUE)
diff --git a/tests/testthat/dummy_packages/cp1252/NAMESPACE b/tests/testthat/dummy_packages/cp1252/NAMESPACE
new file mode 100644
index 000000000..d75f824ec
--- /dev/null
+++ b/tests/testthat/dummy_packages/cp1252/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R b/tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R
new file mode 100644
index 000000000..456983eaf
--- /dev/null
+++ b/tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R
@@ -0,0 +1,71 @@
+# Copied from the spatialprobit package https://raw.githubusercontent.com/cran/spatialprobit/master/R/metropolis-hastings-rho.R
+# This file is encoded in Cp-1252
+
+# @param beta parameter vektor (k x 1)
+# @param z imputed observed variables before truncation (n x 1)
+# @param W spatial weight matrix (n x n)
+# @param X design matrix (n x k)
+density_rho <- function(rho, beta, z, I_n, W, X, type=c("SAR","SEM")) {
+  A <- (I_n - rho * W)          # n x n
+  if (type == "SAR") {
+    S <- A %*% z - X %*% beta     # n x 1; Residuals of SAR
+  } else {
+    S <- as.numeric(A %*% (z - X %*% beta))   # n x 1; Residuals of SEM
+  }
+  as.numeric(det(A) * exp(-0.5 * t(S) %*% S))
+  #as.vector(exp(log(det(A)) - 0.5 * t(S) %*% S))
+}
+
+# Metropolis-Hastings-Chain with tuned acceptance rate, see LeSage (2009)
+#
+# proposal density is normal distribution
+# g(rho* | rho_t) = rho_t + c * N(0, 1)
+#
+# @param type "SEM" or "SAR",
+# @param n number of samples to draw
+# @param start.value start value for rho
+# @param c tuning parameter
+draw_rho_metropolis <- function(type="SEM", n, z, I_n, W, X, burn.in=100, start.value=0.2, c=1) {
+n <- n + burn.in
+u <- runif(n=n, 0, 1)  # samples for M-H-ratio
+s <- rnorm(n=n, 0, 1)  # realisations from proposal density
+rho_t <- numeric(n)    # vector for rho within the chain
+rho_t[1] <- start.value              # start value
+p_t      <- density_rho(rho_t[1], beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)  # f(rho_t | beta, z)
+i    <- 2 # number of accepted proposals / length of chain
+acceptance_rate <- numeric(n) # running acceptance rate
+num_accepted <- 0
+while (i <= n) {
+
+  # create proposal rho_p from proposal density g(rho_p | rho_{t-1}) ~ N(rho_{t-1}, c^2)
+  rho_p <- rho_t[i-1] + c * s[i]   # proposal rho = letztes akzeptiertes Rho + normalverteilte Zufallsstreuung s
+
+  # Berechnung der Dichte f(rho_p) für das Proposal rho_p
+  p_p <- density_rho(rho_p, beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)
+  # SW: Berechnung der Dichte ist teuer, daher besser für ein Grid vorher rechnen?
+  # Jain, Dichte p(rho|t,beta( hängt von z, beta ab. Aber zumindestens die Log-Determinante kann man vorher berechnen
+  # Berechnung Dichte f(rho_{t-1}) für letzten Wert der Chain
+  # p_t <- p(rho_t[i-1], beta=beta, z=z, W=W, X=X, type=type)  # SW: Kann ich die alte Dichte nicht merken, dann muss ich die nicht neu berechnen
+
+  # Wegen Symmetrie der Normalverteilung als Proposal-Dichte g(rho_p | rho_t) = g(rho_t | rho_p)
+  # vereinfacht sich das M-H-Ratio von [ f(rho_p) * g(rho_t | rho_p) ] / [ f(rho_t) * g(rho_p | rho_t) ]
+  # auf f(rho_p) / f(rho_t)!
+  # determine M-H-Ratio R(rho_p, rho_t) = min[ 1 ; p(rho_p) / p(rho_t) ]
+  # see LeSage (2009), eqn (5.27)
+  R <- min(1, p_p / p_t)
+  if (u[i] <= R) {    # see Givens/Hoeting, p.184, eqn (7.2)
+    # accept proposal
+    rho_t[i] <- rho_p
+    p_t <- p_p  # save density
+    num_accepted <- num_accepted + 1
+  } else {
+    # stay with the current value
+    rho_t[i] <- rho_t[i-1]
+  }
+  acceptance_rate[i] <- num_accepted / i
+  if (acceptance_rate[i] < 0.4) c <- c / 1.1   # Wenn Akzeptanzrate zu klein, dann verringere (?) Streuungsparameter "c"
+  if (acceptance_rate[i] > 0.6) c <- c * 1.1
+  i <- i + 1
+}
+return(list(rho_t=rho_t,acceptance_rate=acceptance_rate))
+}
diff --git a/tests/testthat/dummy_packages/cp1252/cp1252.Rproj b/tests/testthat/dummy_packages/cp1252/cp1252.Rproj
new file mode 100644
index 000000000..c7aebb9a4
--- /dev/null
+++ b/tests/testthat/dummy_packages/cp1252/cp1252.Rproj
@@ -0,0 +1,20 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: ISO8859-1
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/tests/testthat/dummy_projects/project/default_linter_testcode.R b/tests/testthat/dummy_projects/project/default_linter_testcode.R
new file mode 100644
index 000000000..ba7ba7cca
--- /dev/null
+++ b/tests/testthat/dummy_projects/project/default_linter_testcode.R
@@ -0,0 +1,46 @@
+# Each of the default linters should throw at least one lint on this file
+
+# assignment
+# function_left_parentheses
+# closed_curly
+# commas
+# paren_brace
+f = function (x,y = 1){}
+
+# commented_code
+# some <- commented("out code")
+
+# cyclocomp
+# equals_na
+# infix_spaces
+# line_length
+# object_length
+# object_name
+# object_usage
+# open_curly
+# T_and_F_symbol
+someComplicatedFunctionWithALongCamelCaseName <- function(x)
+{
+  y <- 1
+  if (1 > 2 && 2 > 3 && 3 > 4 && 4 > 5 && 5*10 > 6 && 5 > 6 && 6 > 7 && x == NA) {T} else {F}
+}
+
+# no_tab
+# pipe_continuation
+# seq_linter
+# spaces_inside
+x <- 1:10
+x[ 2]
+1:length(x) %>% lapply(function(x) x*2) %>%
+	head()
+
+# single_quotes
+message('single_quotes')
+
+# spaces_left_parentheses
+# trailing_whitespace
+# semicolon_terminator
+x <- 42; y <- 2 +(1:10) 
+
+# trailing_blank_lines
+
diff --git a/tests/testthat/dummy_projects/project/metropolis-hastings-rho.R b/tests/testthat/dummy_projects/project/metropolis-hastings-rho.R
new file mode 100644
index 000000000..aba63b6ed
--- /dev/null
+++ b/tests/testthat/dummy_projects/project/metropolis-hastings-rho.R
@@ -0,0 +1,71 @@
+# Copied from https://raw.githubusercontent.com/cran/spatialprobit/master/R/metropolis-hastings-rho.R
+# This file is encoded in Cp-1252
+
+# @param beta parameter vektor (k x 1)
+# @param z imputed observed variables before truncation (n x 1)
+# @param W spatial weight matrix (n x n)
+# @param X design matrix (n x k)
+density_rho <- function(rho, beta, z, I_n, W, X, type=c("SAR","SEM")) {
+  A <- (I_n - rho * W)          # n x n
+  if (type == "SAR") {
+    S <- A %*% z - X %*% beta     # n x 1; Residuals of SAR
+  } else {
+    S <- as.numeric(A %*% (z - X %*% beta))   # n x 1; Residuals of SEM
+  }
+  as.numeric(det(A) * exp(-0.5 * t(S) %*% S))
+  #as.vector(exp(log(det(A)) - 0.5 * t(S) %*% S))
+}
+
+# Metropolis-Hastings-Chain with tuned acceptance rate, see LeSage (2009)
+# 
+# proposal density is normal distribution
+# g(rho* | rho_t) = rho_t + c * N(0, 1)
+#
+# @param type "SEM" or "SAR",
+# @param n number of samples to draw
+# @param start.value start value for rho
+# @param c tuning parameter
+draw_rho_metropolis <- function(type="SEM", n, z, I_n, W, X, burn.in=100, start.value=0.2, c=1) {
+n <- n + burn.in
+u <- runif(n=n, 0, 1)  # samples for M-H-ratio
+s <- rnorm(n=n, 0, 1)  # realisations from proposal density
+rho_t <- numeric(n)    # vector for rho within the chain
+rho_t[1] <- start.value              # start value
+p_t      <- density_rho(rho_t[1], beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)  # f(rho_t | beta, z)
+i    <- 2 # number of accepted proposals / length of chain
+acceptance_rate <- numeric(n) # running acceptance rate
+num_accepted <- 0
+while (i <= n) {
+
+  # create proposal rho_p from proposal density g(rho_p | rho_{t-1}) ~ N(rho_{t-1}, c^2)
+  rho_p <- rho_t[i-1] + c * s[i]   # proposal rho = letztes akzeptiertes Rho + normalverteilte Zufallsstreuung s
+  
+  # Berechnung der Dichte f(rho_p) für das Proposal rho_p
+  p_p <- density_rho(rho_p, beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)       
+  # SW: Berechnung der Dichte ist teuer, daher besser für ein Grid vorher rechnen?
+  # Jain, Dichte p(rho|t,beta( hängt von z, beta ab. Aber zumindestens die Log-Determinante kann man vorher berechnen
+  # Berechnung Dichte f(rho_{t-1}) für letzten Wert der Chain
+  # p_t <- p(rho_t[i-1], beta=beta, z=z, W=W, X=X, type=type)  # SW: Kann ich die alte Dichte nicht merken, dann muss ich die nicht neu berechnen
+
+  # Wegen Symmetrie der Normalverteilung als Proposal-Dichte g(rho_p | rho_t) = g(rho_t | rho_p)
+  # vereinfacht sich das M-H-Ratio von [ f(rho_p) * g(rho_t | rho_p) ] / [ f(rho_t) * g(rho_p | rho_t) ]
+  # auf f(rho_p) / f(rho_t)!
+  # determine M-H-Ratio R(rho_p, rho_t) = min[ 1 ; p(rho_p) / p(rho_t) ]
+  # see LeSage (2009), eqn (5.27)
+  R <- min(1, p_p / p_t)
+  if (u[i] <= R) {    # see Givens/Hoeting, p.184, eqn (7.2)
+    # accept proposal
+    rho_t[i] <- rho_p
+    p_t <- p_p  # save density
+    num_accepted <- num_accepted + 1
+  } else {
+    # stay with the current value
+    rho_t[i] <- rho_t[i-1]
+  }
+  acceptance_rate[i] <- num_accepted / i
+  if (acceptance_rate[i] < 0.4) c <- c / 1.1   # Wenn Akzeptanzrate zu klein, dann verringere (?) Streuungsparameter "c"
+  if (acceptance_rate[i] > 0.6) c <- c * 1.1
+  i <- i + 1
+}
+return(list(rho_t=rho_t,acceptance_rate=acceptance_rate)) 
+}
\ No newline at end of file
diff --git a/tests/testthat/dummy_projects/project/project.Rproj b/tests/testthat/dummy_projects/project/project.Rproj
new file mode 100644
index 000000000..e856a0a49
--- /dev/null
+++ b/tests/testthat/dummy_projects/project/project.Rproj
@@ -0,0 +1,16 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: ISO8859-1
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index ce23dabee..e653af01e 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -124,3 +124,14 @@ test_that("with_defaults doesn't break on very long input", {
     "lintr::undesirable_function_linter"
   )
 })
+
+test_that("it has a smart default for encodings", {
+  read_settings(NULL)
+  expect_equal(settings$encoding, "UTF-8")
+
+  read_settings("dummy_projects/project/metropolis-hastings-rho.R")
+  expect_equal(settings$encoding, "ISO8859-1")
+
+  read_settings("dummy_packages/cp1252/R/metropolis-hastings-rho.R")
+  expect_equal(settings$encoding, "ISO8859-1")
+})
\ No newline at end of file

From 4ac90b5e953d71aa63a516e503f126da560c232b Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Mon, 14 Jun 2021 23:15:46 +0200
Subject: [PATCH 04/26] handle encoding in read_lines()

handle invalid encoding in parse_exclusions() and get_source_file()
suppress linting of file if invalid encoding detected

also had to fix deprecated warnings caused by R 4.1.0 and testthat::expect_equal() with functions
---
 R/exclude.R                    |  6 ++++
 R/get_source_expressions.R     | 66 ++++++++++++++++++++++++----------
 R/utils.R                      |  4 +--
 tests/testthat/test-settings.R |  6 ++--
 4 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/R/exclude.R b/R/exclude.R
index fff816835..17feceeff 100644
--- a/R/exclude.R
+++ b/R/exclude.R
@@ -79,6 +79,12 @@ parse_exclusions <- function(file, exclude = settings$exclude,
 
   exclusions <- list()
 
+  e <- tryCatch(nchar(lines), error = identity)
+  if (inherits(e, "error")) {
+    # Invalid encoding. Don't parse exclusions.
+    return(list())
+  }
+
   start_locations <- rex::re_matches(lines, exclude_start, locations = TRUE)[, "end"] + 1L
   starts <- which(!is.na(start_locations))
   ends <- which(rex::re_matches(lines, exclude_end))
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index 1ad8d3f5c..9643acf80 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -77,6 +77,20 @@ get_source_expressions <- function(filename, lines = NULL) {
     # an error that does not use R_ParseErrorMsg
     if (is.na(message_info$line)) {
 
+      if (grepl("invalid multibyte string, element", e$message, fixed = TRUE)) {
+        # Invalid encoding, will break even re_matches() below, so we need to handle this first.
+        return(
+          Lint(
+            filename = source_file$filename,
+            line_number = 1L,
+            column_number = 1L,
+            type = "error",
+            message = "Invalid multibyte string. Is the encoding correct?",
+            line = ""
+          )
+        )
+      }
+
       message_info <- re_matches(e$message,
         rex(single_quotes, capture(name = "name", anything), single_quotes,
           anything,
@@ -185,26 +199,31 @@ get_source_expressions <- function(filename, lines = NULL) {
   parsed_content <- get_source_file(source_file, error = lint_error)
   tree <- generate_tree(parsed_content)
 
-  expressions <- lapply(
-    X = top_level_expressions(parsed_content),
-    FUN = get_single_source_expression,
-    parsed_content,
-    source_file,
-    filename,
-    tree
-  )
-
-  # add global expression
-  expressions[[length(expressions) + 1L]] <-
-    list(
-      filename = filename,
-      file_lines = source_file$lines,
-      content = source_file$lines,
-      full_parsed_content = parsed_content,
-      full_xml_parsed_content = safe_parse_to_xml(parsed_content),
-      terminal_newline = terminal_newline
+  if (inherits(e, "lint") && !nzchar(e$line)) {
+    # Don't create expression list if it's unreliable (invalid encoding or unhandled parse error)
+    expressions <- NULL
+  } else {
+    expressions <- lapply(
+      X = top_level_expressions(parsed_content),
+      FUN = get_single_source_expression,
+      parsed_content,
+      source_file,
+      filename,
+      tree
     )
 
+    # add global expression
+    expressions[[length(expressions) + 1L]] <-
+      list(
+        filename = filename,
+        file_lines = source_file$lines,
+        content = source_file$lines,
+        full_parsed_content = parsed_content,
+        full_xml_parsed_content = safe_parse_to_xml(parsed_content),
+        terminal_newline = terminal_newline
+      )
+  }
+
   list(expressions = expressions, error = e, lines = source_file$lines)
 }
 
@@ -252,6 +271,17 @@ get_source_file <- function(source_file, error = identity) {
     assign("e", e,  envir = parent.frame())
   }
 
+  # Triggers an error if the lines contain invalid characters.
+  e <- tryCatch(
+    nchar(source_file$content, type = "chars"),
+    error = error
+  )
+
+  if (inherits(e, "error") || inherits(e, "lint")) {
+    assign("e", e,  envir = parent.frame())
+    return() # parsed_content is unreliable if encoding is invalid
+  }
+
   fix_eq_assigns(fix_column_numbers(fix_tab_indentations(source_file)))
 }
 
diff --git a/R/utils.R b/R/utils.R
index c927f71d0..3c99c3661 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -250,10 +250,10 @@ Linter <- function(fun, name = linter_auto_name()) { # nolint: object_name_linte
   structure(fun, class = "linter", name = name)
 }
 
-read_lines <- function(file, ...) {
+read_lines <- function(file, encoding = settings$encoding, ...) {
   terminal_newline <- TRUE
   lines <- withCallingHandlers({
-    readLines(file, warn = TRUE, ...)
+    readLines(file, warn = TRUE, encoding = encoding, ...)
   },
   warning = function(w) {
     if (grepl("incomplete final line found on", w$message, fixed = TRUE)) {
diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index e653af01e..dfdc5ca29 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -3,7 +3,7 @@ test_that("it uses default settings if none provided", {
   read_settings(NULL)
 
   lapply(ls(settings), function(setting) {
-    expect_equal(settings[[setting]], default_settings[[setting]])
+    expect_identical(settings[[setting]], default_settings[[setting]])
   })
 })
 
@@ -32,7 +32,7 @@ test_that("it uses config settings in same directory if provided", {
   read_settings(file)
 
   lapply(setdiff(ls(settings), "exclude"), function(setting) {
-    expect_equal(settings[[setting]], default_settings[[setting]])
+    expect_identical(settings[[setting]], default_settings[[setting]])
   })
 
   expect_equal(settings$exclude, "test")
@@ -58,7 +58,7 @@ test_that("it uses config home directory settings if provided", {
   withr::with_envvar(c(HOME = home_path), read_settings(file))
 
   lapply(setdiff(ls(settings), "exclude"), function(setting) {
-    expect_equal(settings[[setting]], default_settings[[setting]])
+    expect_identical(settings[[setting]], default_settings[[setting]])
   })
 
   expect_equal(settings$exclude, "test")

From 893cdb5e6674e77072494e0956aa6ee3e213a5c8 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 18:33:29 +0200
Subject: [PATCH 05/26] docment()

---
 NAMESPACE                      |  1 +
 R/get_source_expressions.R     |  2 +-
 man/default_settings.Rd        |  2 +-
 man/get_source_expressions.Rd  |  4 ++--
 man/linters.Rd                 | 11 ++++++++---
 tests/testthat/test-settings.R |  6 +++---
 6 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index b977c3bd4..becf3a8eb 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -52,6 +52,7 @@ export(object_name_linter)
 export(object_usage_linter)
 export(open_curly_linter)
 export(paren_brace_linter)
+export(pipe_call_linter)
 export(pipe_continuation_linter)
 export(semicolon_terminator_linter)
 export(seq_linter)
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index 9643acf80..a22ae07f6 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -42,7 +42,7 @@
 #' @export
 #' @md
 get_source_expressions <- function(filename, lines = NULL) {
-  source_file <- srcfile(filename)
+  source_file <- srcfile(filename, encoding = settings$encoding)
 
   # Ensure English locale for terminal newline and zero-length variable warning messages
   old_lang <- set_lang("en")
diff --git a/man/default_settings.Rd b/man/default_settings.Rd
index a9bdc2d1e..032bc5a7d 100644
--- a/man/default_settings.Rd
+++ b/man/default_settings.Rd
@@ -5,7 +5,7 @@
 \alias{default_settings}
 \title{Default lintr settings}
 \format{
-An object of class \code{list} of length 11.
+An object of class \code{list} of length 12.
 }
 \usage{
 default_settings
diff --git a/man/get_source_expressions.Rd b/man/get_source_expressions.Rd
index a7b3950af..f32ed9f78 100644
--- a/man/get_source_expressions.Rd
+++ b/man/get_source_expressions.Rd
@@ -35,7 +35,7 @@ The final element of \code{expressions} is a list corresponding to the full file
 consisting of 6 elements:
 \itemize{
 \item{\code{filename} (\code{character})}
-\item{\code{file_lines} (\code{character}) the \code{\link[=readLines]{readLines()}} output for this file}
+\item{\code{file_lines} (\code{character}) the \code{\link[=readLines]{readLines()}} output for this file} # TODO document encoding
 \item{\code{content} (\code{character}) for .R files, the same as \code{file_lines};
 for .Rmd scripts, this is the extracted R source code (as text)}
 \item{\code{full_parsed_content} (\code{data.frame}) as given by
@@ -47,7 +47,7 @@ newline (as determined by \code{\link[=readLines]{readLines()}} producing a corr
 }
 }
 \item{error}{A \code{Lint} object describing any parsing error.}
-\item{lines}{The \code{\link[=readLines]{readLines()}} output for this file.}
+\item{lines}{The \code{\link[=readLines]{readLines()}} output for this file.} # TODO document encoding
 }
 \description{
 This object is given as input to each linter
diff --git a/man/linters.Rd b/man/linters.Rd
index 793e726cb..cd05b469a 100644
--- a/man/linters.Rd
+++ b/man/linters.Rd
@@ -6,9 +6,9 @@
 %   R/extraction_operator_linter.R, R/function_left_parentheses.R,
 %   R/implicit_integer_linter.R, R/infix_spaces_linter.R,
 %   R/line_length_linter.R, R/missing_argument_linter.R,
-%   R/missing_package_linter.R, R/namespace_linter.R,
-%   R/no_tab_linter.R, R/object_usage_linter.R, R/open_curly_linter.R,
-%   R/paren_brace_linter.R, R/path_linters.R, R/pipe_continuation_linter.R,
+%   R/missing_package_linter.R, R/namespace_linter.R, R/no_tab_linter.R,
+%   R/object_usage_linter.R, R/open_curly_linter.R, R/paren_brace_linter.R,
+%   R/path_linters.R, R/pipe_call_linter.R, R/pipe_continuation_linter.R,
 %   R/semicolon_terminator_linter.R, R/seq_linter.R, R/single_quotes_linter.R,
 %   R/spaces_inside_linter.R, R/spaces_left_parentheses_linter.R,
 %   R/sprintf_linter.R, R/trailing_blank_lines_linter.R,
@@ -42,6 +42,7 @@
 \alias{paren_brace_linter}
 \alias{absolute_path_linter}
 \alias{nonportable_path_linter}
+\alias{pipe_call_linter}
 \alias{pipe_continuation_linter}
 \alias{semicolon_terminator_linter}
 \alias{seq_linter}
@@ -108,6 +109,8 @@ absolute_path_linter(lax = TRUE)
 
 nonportable_path_linter(lax = TRUE)
 
+pipe_call_linter()
+
 pipe_continuation_linter()
 
 semicolon_terminator_linter(semicolon = c("compound", "trailing"))
@@ -261,6 +264,8 @@ parenthesis and an opening curly brace.
 
 \item \code{nonportable_path_linter}: Check that file.path() is used to construct safe and portable paths.
 
+\item \code{pipe_call_linter}: that forces explicit calls in magrittr pipes
+
 \item \code{pipe_continuation_linter}: Check that each step in a pipeline is on a new line, or
 the entire pipe fits on one line.
 
diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index dfdc5ca29..207079969 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -129,9 +129,9 @@ test_that("it has a smart default for encodings", {
   read_settings(NULL)
   expect_equal(settings$encoding, "UTF-8")
 
-  read_settings("dummy_projects/project/metropolis-hastings-rho.R")
+  read_settings(file.path("dummy_projects", "project", "metropolis-hastings-rho.R"))
   expect_equal(settings$encoding, "ISO8859-1")
 
-  read_settings("dummy_packages/cp1252/R/metropolis-hastings-rho.R")
+  read_settings(file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R"))
   expect_equal(settings$encoding, "ISO8859-1")
-})
\ No newline at end of file
+})

From ca777c53e16f22ad9e64e62db6cab8aa3181e5e0 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 19:07:42 +0200
Subject: [PATCH 06/26] try disabling fix_column_numbers() and getting more
 info on test failures

---
 .lintr                         |  1 +
 R/get_source_expressions.R     | 27 +++++++++++++++------------
 tests/testthat/test-settings.R |  6 ++++++
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/.lintr b/.lintr
index 639fe9f22..7fa51541a 100644
--- a/.lintr
+++ b/.lintr
@@ -7,6 +7,7 @@ exclusions: list(
   "inst/example/bad.R",
   "tests/testthat/default_linter_testcode.R",
   "tests/testthat/dummy_packages",
+  "tests/testthat/dummy_projects",
   "tests/testthat/exclusions-test",
   "tests/testthat/knitr_formats",
   "tests/testthat/knitr_malformed"
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index a22ae07f6..062b9f804 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -324,11 +324,14 @@ fix_column_numbers <- function(content) {
     return(NULL)
   }
 
+  # Temporary: Apparently this is no longer necessary.
+  return(content)
+
   text_lengths <- nchar(content$text[content$terminal], "chars")
   byte_lengths <- nchar(content$text[content$terminal], "bytes")
   differences <- byte_lengths - text_lengths
 
-  to_change <- which(differences > 0L)
+  to_change <- which(content$terminal[differences > 0L])
 
   adjusted_col1 <- content$col1
   adjusted_col2 <- content$col2
@@ -337,17 +340,17 @@ fix_column_numbers <- function(content) {
     needs_adjustment <- which(content$line1 == content$line1[i] & content$col1 >= content$col1[i])
 
     for (j in needs_adjustment) {
-      adjusted_col1[j] <-
-        content$col1[j] -
-          sum(differences[
-            content$line1 == content$line1[j] &
-            content$col1 < content$col1[j]])
-
-      adjusted_col2[j] <-
-        content$col2[j] -
-          sum(differences[
-            content$line1 == content$line1[j] &
-            content$col2 < content$col2[j]])
+      adjusted_col1[j] <- content$col1[j] -
+        sum(differences[
+              content$line1[content$terminal] == content$line1[j] &
+                content$col1[content$terminal] < content$col1[j]
+            ])
+
+      adjusted_col2[j] <- content$col2[j] -
+        sum(differences[
+              content$line1[content$terminal] == content$line1[j] &
+                content$col2[content$terminal] <= content$col2[j]
+            ])
     }
   }
   content$col1 <- adjusted_col1
diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index 207079969..c9ee86e97 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -129,6 +129,12 @@ test_that("it has a smart default for encodings", {
   read_settings(NULL)
   expect_equal(settings$encoding, "UTF-8")
 
+  ex1 <- file.exists(file.path("dummy_projects", "project", "metropolis-hastings-rho.R"))
+  ex2 <- file.exists(file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R"))
+  if (!ex1 | !ex2) {
+    stop("Encoding test files do not exist (", ex1, ", ", ex2, "). getwd() = '", getwd(), "'.")
+  }
+
   read_settings(file.path("dummy_projects", "project", "metropolis-hastings-rho.R"))
   expect_equal(settings$encoding, "ISO8859-1")
 

From 4b8b9a73c2c547333d821bb49e23074c60865df3 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 19:45:36 +0200
Subject: [PATCH 07/26] try explicitly testing find_default_encoding()

---
 tests/testthat/test-settings.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index c9ee86e97..f55aab2b1 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -129,15 +129,15 @@ test_that("it has a smart default for encodings", {
   read_settings(NULL)
   expect_equal(settings$encoding, "UTF-8")
 
-  ex1 <- file.exists(file.path("dummy_projects", "project", "metropolis-hastings-rho.R"))
-  ex2 <- file.exists(file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R"))
-  if (!ex1 | !ex2) {
-    stop("Encoding test files do not exist (", ex1, ", ", ex2, "). getwd() = '", getwd(), "'.")
-  }
+  proj_file <- file.path("dummy_projects", "project", "metropolis-hastings-rho.R")
+  pkg_file <- file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R")
 
-  read_settings(file.path("dummy_projects", "project", "metropolis-hastings-rho.R"))
+  expect_equal(find_default_encoding(proj_file), "ISO8859-1")
+  expect_equal(find_default_encoding(pkg_file), "ISO8859-1")
+
+  read_settings(proj_file)
   expect_equal(settings$encoding, "ISO8859-1")
 
-  read_settings(file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R"))
+  read_settings(pkg_file)
   expect_equal(settings$encoding, "ISO8859-1")
 })

From c3f1388ff4d8610183531dee89b8c2895ac1d9b9 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 19:55:48 +0200
Subject: [PATCH 08/26] test even deeper

---
 tests/testthat/test-settings.R | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index f55aab2b1..1462f78d9 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -132,6 +132,12 @@ test_that("it has a smart default for encodings", {
   proj_file <- file.path("dummy_projects", "project", "metropolis-hastings-rho.R")
   pkg_file <- file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R")
 
+  expect_false("encoding" %in% colnames(read.dcf(find_config(proj_file))))
+  expect_false("encoding" %in% colnames(read.dcf(find_config(pkg_file))))
+
+  expect_type(find_rproj(proj_file), "character")
+  expect_type(find_package(pkg_file), "character")
+
   expect_equal(find_default_encoding(proj_file), "ISO8859-1")
   expect_equal(find_default_encoding(pkg_file), "ISO8859-1")
 

From d85219776d6f2d408e8bb4ba6ad1cea74344b9f1 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 20:05:08 +0200
Subject: [PATCH 09/26] check actual config file names

---
 tests/testthat/test-settings.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index 1462f78d9..7ea4bf14e 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -135,8 +135,8 @@ test_that("it has a smart default for encodings", {
   expect_false("encoding" %in% colnames(read.dcf(find_config(proj_file))))
   expect_false("encoding" %in% colnames(read.dcf(find_config(pkg_file))))
 
-  expect_type(find_rproj(proj_file), "character")
-  expect_type(find_package(pkg_file), "character")
+  expect_equal(find_rproj(proj_file), normalizePath(file.path("dummy_projects", "project", "project.Rproj")))
+  expect_equal(find_package(pkg_file), normalizePath(file.path("dummy_packages", "cp1252")))
 
   expect_equal(find_default_encoding(proj_file), "ISO8859-1")
   expect_equal(find_default_encoding(pkg_file), "ISO8859-1")

From 088d0158eedc96a5125250f81046639de562f3fd Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 20:20:15 +0200
Subject: [PATCH 10/26] try tweaking .Rbuildignore

---
 .Rbuildignore                  | 2 +-
 tests/testthat/test-settings.R | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 44104f43b..98837c7c5 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -6,7 +6,7 @@
 ^travis-tool\.sh$
 ^.*\.gz$
 ^cran-comments\.md$
-^.*\.Rproj$
+^lintr\.Rproj$
 ^\.Rproj\.user$
 ^\.idea$
 ^\.dev$
diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index 7ea4bf14e..ed970134b 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -132,9 +132,6 @@ test_that("it has a smart default for encodings", {
   proj_file <- file.path("dummy_projects", "project", "metropolis-hastings-rho.R")
   pkg_file <- file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R")
 
-  expect_false("encoding" %in% colnames(read.dcf(find_config(proj_file))))
-  expect_false("encoding" %in% colnames(read.dcf(find_config(pkg_file))))
-
   expect_equal(find_rproj(proj_file), normalizePath(file.path("dummy_projects", "project", "project.Rproj")))
   expect_equal(find_package(pkg_file), normalizePath(file.path("dummy_packages", "cp1252")))
 

From 3441e2fe2f5def4f5c5d9283e8bf9b2e41967315 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 20:40:10 +0200
Subject: [PATCH 11/26] fix test for windows, fix R CMD check NOTE, fix new
 regression test (expects parse error instead of encoding error to be
 reported)

---
 .Rbuildignore                  |  1 +
 R/get_source_expressions.R     |  5 ++++-
 tests/testthat/test-settings.R | 10 ++++++++--
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/.Rbuildignore b/.Rbuildignore
index 98837c7c5..986e8cc38 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -24,3 +24,4 @@
 ^bench$
 ^tests/testthat/dummy_packages/package/[.]Rbuildignore$
 ^tests/testthat/dummy_packages/package/[.]lintr$
+^tests/testthat/dummy_packages/cp1252/[.]Rbuildignore$
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index 062b9f804..7c7dbe6c0 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -254,6 +254,7 @@ get_single_source_expression <- function(loc,
 }
 
 get_source_file <- function(source_file, error = identity) {
+  parse_error <- FALSE
 
   e <- tryCatch(
     source_file$parsed_content <- parse(text = source_file$content, srcfile = source_file, keep.source = TRUE),
@@ -269,6 +270,7 @@ get_source_file <- function(source_file, error = identity) {
 
   if (inherits(e, "error") || inherits(e, "lint")) {
     assign("e", e,  envir = parent.frame())
+    parse_error <- TRUE
   }
 
   # Triggers an error if the lines contain invalid characters.
@@ -278,7 +280,8 @@ get_source_file <- function(source_file, error = identity) {
   )
 
   if (inherits(e, "error") || inherits(e, "lint")) {
-    assign("e", e,  envir = parent.frame())
+    # Let parse errors take precedence over encoding problems
+    if (!parse_error) assign("e", e,  envir = parent.frame())
     return() # parsed_content is unreliable if encoding is invalid
   }
 
diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index ed970134b..bd6ab51bc 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -132,8 +132,14 @@ test_that("it has a smart default for encodings", {
   proj_file <- file.path("dummy_projects", "project", "metropolis-hastings-rho.R")
   pkg_file <- file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R")
 
-  expect_equal(find_rproj(proj_file), normalizePath(file.path("dummy_projects", "project", "project.Rproj")))
-  expect_equal(find_package(pkg_file), normalizePath(file.path("dummy_packages", "cp1252")))
+  expect_equal(
+    find_rproj(proj_file),
+    normalizePath(file.path("dummy_projects", "project", "project.Rproj"), winslash = "/")
+  )
+  expect_equal(
+    find_package(pkg_file),
+    normalizePath(file.path("dummy_packages", "cp1252"), winslash = "/")
+  )
 
   expect_equal(find_default_encoding(proj_file), "ISO8859-1")
   expect_equal(find_default_encoding(pkg_file), "ISO8859-1")

From 52dca5c4c80993c54b9f5ca120f46f5e5dab794f Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 22:36:44 +0200
Subject: [PATCH 12/26] properly supply encoding to readLines()

---
 .dev/compare_branches.R                      |  6 +++++-
 R/cache.R                                    |  4 +++-
 R/get_source_expressions.R                   | 14 ++++++++++++--
 R/utils.R                                    |  4 +++-
 tests/testthat/test-get_source_expressions.R |  6 +++++-
 5 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/.dev/compare_branches.R b/.dev/compare_branches.R
index fa5824334..a7665661a 100755
--- a/.dev/compare_branches.R
+++ b/.dev/compare_branches.R
@@ -146,7 +146,11 @@ test_encoding <- function(dir) {
   tryCatch({
     lapply(
       list.files(dir, pattern = "(?i)\\.r(?:md)?$", recursive = TRUE, full.names = TRUE),
-      function(x) nchar(readLines(x, warn = FALSE)) # TODO respect encoding
+      function(x) {
+        con <- file(x, encoding = lintr:::find_default_encoding(x))
+        on.exit(close(con))
+        nchar(readLines(con, warn = FALSE))
+      }
     )
     FALSE
   }, error = function(x) TRUE)
diff --git a/R/cache.R b/R/cache.R
index 9a517e1d8..cf09b0cb5 100644
--- a/R/cache.R
+++ b/R/cache.R
@@ -119,7 +119,9 @@ digest_content <- function(linters, obj) {
     list(linters, obj$content, is.null(obj$parsed_content))
   } else {
     # assume a filename
-    list(linters, readLines(obj)) # TODO respect encoding
+    con <- file(obj, encoding = settings$encoding)
+    on.exit(close(con))
+    list(linters, readLines(con))
   }
   digest::digest(content, algo = "sha1")
 }
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index 7c7dbe6c0..04effb687 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -1,6 +1,16 @@
 #' Parsed sourced file from a filename
 #'
 #' This object is given as input to each linter
+#'
+#' @details
+#' The file is read in using the `encoding` setting.
+#' This setting found by taking the first valid result from the following locations
+#'
+#' 1. The `encoding` key from the usual lintr configuration settings.
+#' 2. The `Encoding` field from a Package `DESCRIPTION` file in a parent directory.
+#' 3. The `Encoding` field from an R Project `.Rproj` file in a parent directory.
+#' 4. `"UTF-8"` as a fallback.
+#'
 #' @param filename the file to be parsed.
 #' @param lines a character vector of lines.
 #'   If \code{NULL}, then \code{filename} will be read.
@@ -26,7 +36,7 @@
 #'   consisting of 6 elements:
 #'   \itemize{
 #'     \item{`filename` (`character`)}
-#'     \item{`file_lines` (`character`) the [readLines()] output for this file} # TODO document encoding
+#'     \item{`file_lines` (`character`) the [readLines()] output for this file}
 #'     \item{`content` (`character`) for .R files, the same as `file_lines`;
 #'           for .Rmd scripts, this is the extracted R source code (as text)}
 #'     \item{`full_parsed_content` (`data.frame`) as given by
@@ -38,7 +48,7 @@
 #'   }
 #'   }
 #'   \item{error}{A `Lint` object describing any parsing error.}
-#'   \item{lines}{The [readLines()] output for this file.} # TODO document encoding
+#'   \item{lines}{The [readLines()] output for this file.}
 #' @export
 #' @md
 get_source_expressions <- function(filename, lines = NULL) {
diff --git a/R/utils.R b/R/utils.R
index 3c99c3661..3bfe33ee7 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -252,8 +252,10 @@ Linter <- function(fun, name = linter_auto_name()) { # nolint: object_name_linte
 
 read_lines <- function(file, encoding = settings$encoding, ...) {
   terminal_newline <- TRUE
+  con <- file(file, encoding = encoding)
+  on.exit(close(con))
   lines <- withCallingHandlers({
-    readLines(file, warn = TRUE, encoding = encoding, ...)
+    readLines(con, warn = TRUE, ...)
   },
   warning = function(w) {
     if (grepl("incomplete final line found on", w$message, fixed = TRUE)) {
diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
index e0ff739b6..2f9d7350d 100644
--- a/tests/testthat/test-get_source_expressions.R
+++ b/tests/testthat/test-get_source_expressions.R
@@ -91,4 +91,8 @@ test_that("Multi-byte character truncated by parser is ignored", {
   })
 })
 
-# TODO test encoding-specific behaviour
+test_that("Can read non UTF-8 file", {
+  file <- "dummy_projects/project/metropolis-hastings-rho.R"
+  read_settings(file)
+  expect_null(get_source_expressions(file)$error)
+})

From 235daf2ab057618b8beef011b592f5b6bbf4d871 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 22:41:25 +0200
Subject: [PATCH 13/26] remove GPLv2 code, make path test more resilient

---
 man/get_source_expressions.Rd                 | 14 +++-
 .../testthat/dummy_packages/cp1252/R/cp1252.R |  4 ++
 .../cp1252/R/metropolis-hastings-rho.R        | 71 -------------------
 .../testthat/dummy_projects/project/cp1252.R  |  4 ++
 .../project/metropolis-hastings-rho.R         | 71 -------------------
 tests/testthat/test-get_source_expressions.R  |  2 +-
 tests/testthat/test-settings.R                |  4 +-
 7 files changed, 23 insertions(+), 147 deletions(-)
 create mode 100644 tests/testthat/dummy_packages/cp1252/R/cp1252.R
 delete mode 100644 tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R
 create mode 100644 tests/testthat/dummy_projects/project/cp1252.R
 delete mode 100644 tests/testthat/dummy_projects/project/metropolis-hastings-rho.R

diff --git a/man/get_source_expressions.Rd b/man/get_source_expressions.Rd
index f32ed9f78..068808770 100644
--- a/man/get_source_expressions.Rd
+++ b/man/get_source_expressions.Rd
@@ -35,7 +35,7 @@ The final element of \code{expressions} is a list corresponding to the full file
 consisting of 6 elements:
 \itemize{
 \item{\code{filename} (\code{character})}
-\item{\code{file_lines} (\code{character}) the \code{\link[=readLines]{readLines()}} output for this file} # TODO document encoding
+\item{\code{file_lines} (\code{character}) the \code{\link[=readLines]{readLines()}} output for this file}
 \item{\code{content} (\code{character}) for .R files, the same as \code{file_lines};
 for .Rmd scripts, this is the extracted R source code (as text)}
 \item{\code{full_parsed_content} (\code{data.frame}) as given by
@@ -47,8 +47,18 @@ newline (as determined by \code{\link[=readLines]{readLines()}} producing a corr
 }
 }
 \item{error}{A \code{Lint} object describing any parsing error.}
-\item{lines}{The \code{\link[=readLines]{readLines()}} output for this file.} # TODO document encoding
+\item{lines}{The \code{\link[=readLines]{readLines()}} output for this file.}
 }
 \description{
 This object is given as input to each linter
 }
+\details{
+The file is read in using the \code{encoding} setting.
+This setting found by taking the first valid result from the following locations
+\enumerate{
+\item The \code{encoding} key from the usual lintr configuration settings.
+\item The \code{Encoding} field from a Package \code{DESCRIPTION} file in a parent directory.
+\item The \code{Encoding} field from an R Project \code{.Rproj} file in a parent directory.
+\item \code{"UTF-8"} as a fallback.
+}
+}
diff --git a/tests/testthat/dummy_packages/cp1252/R/cp1252.R b/tests/testthat/dummy_packages/cp1252/R/cp1252.R
new file mode 100644
index 000000000..a2ffbe425
--- /dev/null
+++ b/tests/testthat/dummy_packages/cp1252/R/cp1252.R
@@ -0,0 +1,4 @@
+# This file is encoded in Cp-1252
+
+# Comment containing non-ASCII ä
+ü <- 42
\ No newline at end of file
diff --git a/tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R b/tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R
deleted file mode 100644
index 456983eaf..000000000
--- a/tests/testthat/dummy_packages/cp1252/R/metropolis-hastings-rho.R
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copied from the spatialprobit package https://raw.githubusercontent.com/cran/spatialprobit/master/R/metropolis-hastings-rho.R
-# This file is encoded in Cp-1252
-
-# @param beta parameter vektor (k x 1)
-# @param z imputed observed variables before truncation (n x 1)
-# @param W spatial weight matrix (n x n)
-# @param X design matrix (n x k)
-density_rho <- function(rho, beta, z, I_n, W, X, type=c("SAR","SEM")) {
-  A <- (I_n - rho * W)          # n x n
-  if (type == "SAR") {
-    S <- A %*% z - X %*% beta     # n x 1; Residuals of SAR
-  } else {
-    S <- as.numeric(A %*% (z - X %*% beta))   # n x 1; Residuals of SEM
-  }
-  as.numeric(det(A) * exp(-0.5 * t(S) %*% S))
-  #as.vector(exp(log(det(A)) - 0.5 * t(S) %*% S))
-}
-
-# Metropolis-Hastings-Chain with tuned acceptance rate, see LeSage (2009)
-#
-# proposal density is normal distribution
-# g(rho* | rho_t) = rho_t + c * N(0, 1)
-#
-# @param type "SEM" or "SAR",
-# @param n number of samples to draw
-# @param start.value start value for rho
-# @param c tuning parameter
-draw_rho_metropolis <- function(type="SEM", n, z, I_n, W, X, burn.in=100, start.value=0.2, c=1) {
-n <- n + burn.in
-u <- runif(n=n, 0, 1)  # samples for M-H-ratio
-s <- rnorm(n=n, 0, 1)  # realisations from proposal density
-rho_t <- numeric(n)    # vector for rho within the chain
-rho_t[1] <- start.value              # start value
-p_t      <- density_rho(rho_t[1], beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)  # f(rho_t | beta, z)
-i    <- 2 # number of accepted proposals / length of chain
-acceptance_rate <- numeric(n) # running acceptance rate
-num_accepted <- 0
-while (i <= n) {
-
-  # create proposal rho_p from proposal density g(rho_p | rho_{t-1}) ~ N(rho_{t-1}, c^2)
-  rho_p <- rho_t[i-1] + c * s[i]   # proposal rho = letztes akzeptiertes Rho + normalverteilte Zufallsstreuung s
-
-  # Berechnung der Dichte f(rho_p) für das Proposal rho_p
-  p_p <- density_rho(rho_p, beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)
-  # SW: Berechnung der Dichte ist teuer, daher besser für ein Grid vorher rechnen?
-  # Jain, Dichte p(rho|t,beta( hängt von z, beta ab. Aber zumindestens die Log-Determinante kann man vorher berechnen
-  # Berechnung Dichte f(rho_{t-1}) für letzten Wert der Chain
-  # p_t <- p(rho_t[i-1], beta=beta, z=z, W=W, X=X, type=type)  # SW: Kann ich die alte Dichte nicht merken, dann muss ich die nicht neu berechnen
-
-  # Wegen Symmetrie der Normalverteilung als Proposal-Dichte g(rho_p | rho_t) = g(rho_t | rho_p)
-  # vereinfacht sich das M-H-Ratio von [ f(rho_p) * g(rho_t | rho_p) ] / [ f(rho_t) * g(rho_p | rho_t) ]
-  # auf f(rho_p) / f(rho_t)!
-  # determine M-H-Ratio R(rho_p, rho_t) = min[ 1 ; p(rho_p) / p(rho_t) ]
-  # see LeSage (2009), eqn (5.27)
-  R <- min(1, p_p / p_t)
-  if (u[i] <= R) {    # see Givens/Hoeting, p.184, eqn (7.2)
-    # accept proposal
-    rho_t[i] <- rho_p
-    p_t <- p_p  # save density
-    num_accepted <- num_accepted + 1
-  } else {
-    # stay with the current value
-    rho_t[i] <- rho_t[i-1]
-  }
-  acceptance_rate[i] <- num_accepted / i
-  if (acceptance_rate[i] < 0.4) c <- c / 1.1   # Wenn Akzeptanzrate zu klein, dann verringere (?) Streuungsparameter "c"
-  if (acceptance_rate[i] > 0.6) c <- c * 1.1
-  i <- i + 1
-}
-return(list(rho_t=rho_t,acceptance_rate=acceptance_rate))
-}
diff --git a/tests/testthat/dummy_projects/project/cp1252.R b/tests/testthat/dummy_projects/project/cp1252.R
new file mode 100644
index 000000000..a2ffbe425
--- /dev/null
+++ b/tests/testthat/dummy_projects/project/cp1252.R
@@ -0,0 +1,4 @@
+# This file is encoded in Cp-1252
+
+# Comment containing non-ASCII ä
+ü <- 42
\ No newline at end of file
diff --git a/tests/testthat/dummy_projects/project/metropolis-hastings-rho.R b/tests/testthat/dummy_projects/project/metropolis-hastings-rho.R
deleted file mode 100644
index aba63b6ed..000000000
--- a/tests/testthat/dummy_projects/project/metropolis-hastings-rho.R
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copied from https://raw.githubusercontent.com/cran/spatialprobit/master/R/metropolis-hastings-rho.R
-# This file is encoded in Cp-1252
-
-# @param beta parameter vektor (k x 1)
-# @param z imputed observed variables before truncation (n x 1)
-# @param W spatial weight matrix (n x n)
-# @param X design matrix (n x k)
-density_rho <- function(rho, beta, z, I_n, W, X, type=c("SAR","SEM")) {
-  A <- (I_n - rho * W)          # n x n
-  if (type == "SAR") {
-    S <- A %*% z - X %*% beta     # n x 1; Residuals of SAR
-  } else {
-    S <- as.numeric(A %*% (z - X %*% beta))   # n x 1; Residuals of SEM
-  }
-  as.numeric(det(A) * exp(-0.5 * t(S) %*% S))
-  #as.vector(exp(log(det(A)) - 0.5 * t(S) %*% S))
-}
-
-# Metropolis-Hastings-Chain with tuned acceptance rate, see LeSage (2009)
-# 
-# proposal density is normal distribution
-# g(rho* | rho_t) = rho_t + c * N(0, 1)
-#
-# @param type "SEM" or "SAR",
-# @param n number of samples to draw
-# @param start.value start value for rho
-# @param c tuning parameter
-draw_rho_metropolis <- function(type="SEM", n, z, I_n, W, X, burn.in=100, start.value=0.2, c=1) {
-n <- n + burn.in
-u <- runif(n=n, 0, 1)  # samples for M-H-ratio
-s <- rnorm(n=n, 0, 1)  # realisations from proposal density
-rho_t <- numeric(n)    # vector for rho within the chain
-rho_t[1] <- start.value              # start value
-p_t      <- density_rho(rho_t[1], beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)  # f(rho_t | beta, z)
-i    <- 2 # number of accepted proposals / length of chain
-acceptance_rate <- numeric(n) # running acceptance rate
-num_accepted <- 0
-while (i <= n) {
-
-  # create proposal rho_p from proposal density g(rho_p | rho_{t-1}) ~ N(rho_{t-1}, c^2)
-  rho_p <- rho_t[i-1] + c * s[i]   # proposal rho = letztes akzeptiertes Rho + normalverteilte Zufallsstreuung s
-  
-  # Berechnung der Dichte f(rho_p) für das Proposal rho_p
-  p_p <- density_rho(rho_p, beta=beta, z=z, I_n=I_n, W=W, X=X, type=type)       
-  # SW: Berechnung der Dichte ist teuer, daher besser für ein Grid vorher rechnen?
-  # Jain, Dichte p(rho|t,beta( hängt von z, beta ab. Aber zumindestens die Log-Determinante kann man vorher berechnen
-  # Berechnung Dichte f(rho_{t-1}) für letzten Wert der Chain
-  # p_t <- p(rho_t[i-1], beta=beta, z=z, W=W, X=X, type=type)  # SW: Kann ich die alte Dichte nicht merken, dann muss ich die nicht neu berechnen
-
-  # Wegen Symmetrie der Normalverteilung als Proposal-Dichte g(rho_p | rho_t) = g(rho_t | rho_p)
-  # vereinfacht sich das M-H-Ratio von [ f(rho_p) * g(rho_t | rho_p) ] / [ f(rho_t) * g(rho_p | rho_t) ]
-  # auf f(rho_p) / f(rho_t)!
-  # determine M-H-Ratio R(rho_p, rho_t) = min[ 1 ; p(rho_p) / p(rho_t) ]
-  # see LeSage (2009), eqn (5.27)
-  R <- min(1, p_p / p_t)
-  if (u[i] <= R) {    # see Givens/Hoeting, p.184, eqn (7.2)
-    # accept proposal
-    rho_t[i] <- rho_p
-    p_t <- p_p  # save density
-    num_accepted <- num_accepted + 1
-  } else {
-    # stay with the current value
-    rho_t[i] <- rho_t[i-1]
-  }
-  acceptance_rate[i] <- num_accepted / i
-  if (acceptance_rate[i] < 0.4) c <- c / 1.1   # Wenn Akzeptanzrate zu klein, dann verringere (?) Streuungsparameter "c"
-  if (acceptance_rate[i] > 0.6) c <- c * 1.1
-  i <- i + 1
-}
-return(list(rho_t=rho_t,acceptance_rate=acceptance_rate)) 
-}
\ No newline at end of file
diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
index 2f9d7350d..bce962b4b 100644
--- a/tests/testthat/test-get_source_expressions.R
+++ b/tests/testthat/test-get_source_expressions.R
@@ -92,7 +92,7 @@ test_that("Multi-byte character truncated by parser is ignored", {
 })
 
 test_that("Can read non UTF-8 file", {
-  file <- "dummy_projects/project/metropolis-hastings-rho.R"
+  file <- "dummy_projects/project/cp1252.R"
   read_settings(file)
   expect_null(get_source_expressions(file)$error)
 })
diff --git a/tests/testthat/test-settings.R b/tests/testthat/test-settings.R
index bd6ab51bc..5a40e87d7 100644
--- a/tests/testthat/test-settings.R
+++ b/tests/testthat/test-settings.R
@@ -133,11 +133,11 @@ test_that("it has a smart default for encodings", {
   pkg_file <- file.path("dummy_packages", "cp1252", "R", "metropolis-hastings-rho.R")
 
   expect_equal(
-    find_rproj(proj_file),
+    normalizePath(find_rproj(proj_file), winslash = "/"),
     normalizePath(file.path("dummy_projects", "project", "project.Rproj"), winslash = "/")
   )
   expect_equal(
-    find_package(pkg_file),
+    normalizePath(find_package(pkg_file), winslash = "/"),
     normalizePath(file.path("dummy_packages", "cp1252"), winslash = "/")
   )
 

From f1a84a659c00a6c5d73bbe5ab18187b34e0bcf8b Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 22:57:41 +0200
Subject: [PATCH 14/26] remove fix_column_numbers() as it's not needed when
 srcfile() gets the correct encoding.

---
 R/get_source_expressions.R | 43 +-------------------------------------
 1 file changed, 1 insertion(+), 42 deletions(-)

diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index 04effb687..e9fbffc71 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -295,7 +295,7 @@ get_source_file <- function(source_file, error = identity) {
     return() # parsed_content is unreliable if encoding is invalid
   }
 
-  fix_eq_assigns(fix_column_numbers(fix_tab_indentations(source_file)))
+  fix_eq_assigns(fix_tab_indentations(source_file))
 }
 
 find_line_fun <- function(content) {
@@ -331,47 +331,6 @@ find_column_fun <- function(content) {
   }
 }
 
-# Adjust the columns that getParseData reports from bytes to characters.
-fix_column_numbers <- function(content) {
-  if (is.null(content)) {
-    return(NULL)
-  }
-
-  # Temporary: Apparently this is no longer necessary.
-  return(content)
-
-  text_lengths <- nchar(content$text[content$terminal], "chars")
-  byte_lengths <- nchar(content$text[content$terminal], "bytes")
-  differences <- byte_lengths - text_lengths
-
-  to_change <- which(content$terminal[differences > 0L])
-
-  adjusted_col1 <- content$col1
-  adjusted_col2 <- content$col2
-
-  for (i in to_change) {
-    needs_adjustment <- which(content$line1 == content$line1[i] & content$col1 >= content$col1[i])
-
-    for (j in needs_adjustment) {
-      adjusted_col1[j] <- content$col1[j] -
-        sum(differences[
-              content$line1[content$terminal] == content$line1[j] &
-                content$col1[content$terminal] < content$col1[j]
-            ])
-
-      adjusted_col2[j] <- content$col2[j] -
-        sum(differences[
-              content$line1[content$terminal] == content$line1[j] &
-                content$col2[content$terminal] <= content$col2[j]
-            ])
-    }
-  }
-  content$col1 <- adjusted_col1
-  content$col2 <- adjusted_col2
-  content
-}
-
-
 # Fix column numbers when there are tabs
 # getParseData() counts 1 tab as a variable number of spaces instead of one:
 # https://github.com/wch/r-source/blame/e7401b68ab0e032fce3e376aaca9a5431619b2b4/src/main/gram.y#L512

From 7240edac895abb3d22bba857693a28f18157d231 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Wed, 30 Jun 2021 23:32:28 +0200
Subject: [PATCH 15/26] write files with proper encoding

---
 R/expect_lint.R                              | 6 ++++--
 R/lint.R                                     | 4 +++-
 tests/testthat/test-get_source_expressions.R | 6 ++++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/R/expect_lint.R b/R/expect_lint.R
index 3241600cd..5b9c99a10 100644
--- a/R/expect_lint.R
+++ b/R/expect_lint.R
@@ -45,8 +45,10 @@ expect_lint <- function(content, checks, ..., file = NULL, language = "en") {
 
   if (is.null(file)) {
     file <- tempfile()
-    on.exit(unlink(file), add = TRUE)
-    writeLines(content, con = file, sep = "\n")
+    con <- base::file(file, encoding = "UTF-8")
+    on.exit({ unlink(file) }, add = TRUE)
+    writeLines(content, con = con, sep = "\n")
+    close(con)
   }
 
   lints <- lint(file, ...)
diff --git a/R/lint.R b/R/lint.R
index 513ace6eb..4fd2673d1 100644
--- a/R/lint.R
+++ b/R/lint.R
@@ -56,8 +56,10 @@ lint <- function(filename, linters = NULL, cache = FALSE, ..., parse_settings =
 
   if (inline_data && no_filename) {
     filename <- tempfile()
+    con <- file(filename, open = "w", encoding = settings$encoding)
     on.exit(unlink(filename), add = TRUE)
-    writeLines(text = text, con = filename, sep = "\n")
+    writeLines(text = text, con = con, sep = "\n")
+    close(con)
   }
 
   lines <- if (is.null(text)) {
diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
index bce962b4b..3e5b1ad77 100644
--- a/tests/testthat/test-get_source_expressions.R
+++ b/tests/testthat/test-get_source_expressions.R
@@ -1,7 +1,9 @@
 with_content_to_parse <- function(content, code) {
   f <- tempfile()
-  on.exit(unlink(f))
-  writeLines(content, f)
+  con <- file(f, open = "w", encoding = "UTF-8")
+  on.exit({ unlink(f) })
+  writeLines(content, con)
+  close(con)
   source_expressions <- get_source_expressions(f)
   content_env <- new.env()
   content_env$pc <- lapply(source_expressions[["expressions"]], `[[`, "parsed_content")

From dcdb01c75ee85644002c59cbcb46024c8efb3cd6 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Thu, 1 Jul 2021 09:54:53 +0200
Subject: [PATCH 16/26] use iconv() instead of connections to get more control
 over messages

---
 R/cache.R                                      |  4 +---
 R/get_source_expressions.R                     | 18 +++++++++++++++++-
 R/utils.R                                      |  7 ++++---
 .../dummy_projects/project/cp1252_parseable.R  |  4 ++++
 tests/testthat/test-get_source_expressions.R   | 16 ++++++++++++++++
 5 files changed, 42 insertions(+), 7 deletions(-)
 create mode 100644 tests/testthat/dummy_projects/project/cp1252_parseable.R

diff --git a/R/cache.R b/R/cache.R
index cf09b0cb5..7a412be9e 100644
--- a/R/cache.R
+++ b/R/cache.R
@@ -119,9 +119,7 @@ digest_content <- function(linters, obj) {
     list(linters, obj$content, is.null(obj$parsed_content))
   } else {
     # assume a filename
-    con <- file(obj, encoding = settings$encoding)
-    on.exit(close(con))
-    list(linters, readLines(con))
+    list(linters, readLines(obj))
   }
   digest::digest(content, algo = "sha1")
 }
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index e9fbffc71..7e0a11696 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -87,7 +87,23 @@ get_source_expressions <- function(filename, lines = NULL) {
     # an error that does not use R_ParseErrorMsg
     if (is.na(message_info$line)) {
 
-      if (grepl("invalid multibyte string, element", e$message, fixed = TRUE)) {
+      if (grepl("invalid multibyte character in parser at line", e$message, fixed = TRUE)) {
+        l <- as.integer(re_matches(
+          e$message,
+          rex("invalid multibyte character in parser at line ", capture(name = "line", digits))
+        )$line)
+        # Invalid encoding in source code
+        return(
+          Lint(
+            filename = source_file$filename,
+            line_number = l,
+            column_number = 1L,
+            type = "error",
+            message = "Invalid multibyte character in parser. Is the encoding correct?",
+            line = source_file$lines[[l]]
+          )
+        )
+      } else if (grepl("invalid multibyte string, element", e$message, fixed = TRUE)) {
         # Invalid encoding, will break even re_matches() below, so we need to handle this first.
         return(
           Lint(
diff --git a/R/utils.R b/R/utils.R
index 3bfe33ee7..eaa6da636 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -252,10 +252,8 @@ Linter <- function(fun, name = linter_auto_name()) { # nolint: object_name_linte
 
 read_lines <- function(file, encoding = settings$encoding, ...) {
   terminal_newline <- TRUE
-  con <- file(file, encoding = encoding)
-  on.exit(close(con))
   lines <- withCallingHandlers({
-    readLines(con, warn = TRUE, ...)
+    readLines(file, warn = TRUE, ...)
   },
   warning = function(w) {
     if (grepl("incomplete final line found on", w$message, fixed = TRUE)) {
@@ -263,6 +261,9 @@ read_lines <- function(file, encoding = settings$encoding, ...) {
       invokeRestart("muffleWarning")
     }
   })
+  lines_conv <- iconv(lines, from = encoding, to = "UTF-8")
+  lines[!is.na(lines_conv)] <- lines_conv[!is.na(lines_conv)]
+  Encoding(lines) <- "UTF-8"
   attr(lines, "terminal_newline") <- terminal_newline
   lines
 }
diff --git a/tests/testthat/dummy_projects/project/cp1252_parseable.R b/tests/testthat/dummy_projects/project/cp1252_parseable.R
new file mode 100644
index 000000000..6027ee98b
--- /dev/null
+++ b/tests/testthat/dummy_projects/project/cp1252_parseable.R
@@ -0,0 +1,4 @@
+# This file is encoded in Cp-1252
+
+# Comment containing non-ASCII ä
+a <- 42
\ No newline at end of file
diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
index 3e5b1ad77..9e30447f6 100644
--- a/tests/testthat/test-get_source_expressions.R
+++ b/tests/testthat/test-get_source_expressions.R
@@ -98,3 +98,19 @@ test_that("Can read non UTF-8 file", {
   read_settings(file)
   expect_null(get_source_expressions(file)$error)
 })
+
+test_that("Warns if encoding is misspecified", {
+  file <- "dummy_projects/project/cp1252.R"
+  read_settings(NULL)
+  the_lint <- get_source_expressions(file)$error
+  expect_s3_class(the_lint, "lint")
+  expect_equal(the_lint$message, "Invalid multibyte character in parser. Is the encoding correct?")
+  expect_equal(the_lint$line_number, 4L)
+
+  file <- "dummy_projects/project/cp1252_parseable.R"
+  read_settings(NULL)
+  the_lint <- get_source_expressions(file)$error
+  expect_s3_class(the_lint, "lint")
+  expect_equal(the_lint$message, "Invalid multibyte string. Is the encoding correct?")
+  expect_equal(the_lint$line_number, 1L)
+})

From b97858d26ffcce5e5f5d51121d15f67e4dc0ef22 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Thu, 1 Jul 2021 10:15:34 +0200
Subject: [PATCH 17/26] update .lintr_new make lint_package test for warning
 more resilient (it failed the codecov build)

---
 .lintr_new                         |  1 +
 tests/testthat/test-lint_package.R | 19 +++++++++----------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.lintr_new b/.lintr_new
index a8be72455..5d6f47b90 100644
--- a/.lintr_new
+++ b/.lintr_new
@@ -3,6 +3,7 @@ linters: with_defaults(
   )
 exclusions: list(
   "tests/testthat/dummy_packages",
+  "tests/testthat/dummy_projects",
   "tests/testthat/knitr_formats",
   "tests/testthat/knitr_malformed",
   "inst",
diff --git a/tests/testthat/test-lint_package.R b/tests/testthat/test-lint_package.R
index 79db227ab..cf7ed09f1 100644
--- a/tests/testthat/test-lint_package.R
+++ b/tests/testthat/test-lint_package.R
@@ -113,20 +113,19 @@ test_that(
 })
 
 test_that("lint_package returns early if no package is found", {
-  skip_if(
-    !is.null(suppressWarnings(lint_package(dirname(tempdir())))),
-    "temp directory matches a package structure"
-  )
-  expect_warning(l <- lint_package(tempdir()), "Didn't find any R package", fixed = TRUE)
+
+  temp_pkg <- tempfile("dir")
+  dir.create(temp_pkg)
+  on.exit(unlink(temp_pkg, recursive = TRUE))
+
+  expect_warning(l <- lint_package(temp_pkg), "Didn't find any R package", fixed = TRUE)
   expect_null(l)
 
-  skip_if(
-    !is.null(suppressWarnings(lint_package("."))),
-    "package test directory matches a package structure"
-  )
   # ignore a folder named DESCRIPTION, #702
+  file.copy(file.path("dummy_packages", "desc_dir_pkg"), temp_pkg, recursive = TRUE)
+
   expect_warning(
-    lint_package(file.path("dummy_packages", "desc_dir_pkg", "DESCRIPTION", "R")),
+    lint_package(file.path(temp_pkg, "desc_dir_pkg", "DESCRIPTION", "R")),
     "Didn't find any R package", fixed = TRUE
   )
 })

From 0454a4eafa44bbfc64322f2d5f97517c0187b1b5 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Thu, 1 Jul 2021 10:51:01 +0200
Subject: [PATCH 18/26] fix test for windows, add test for parse_exclusions

---
 tests/testthat/test-cache.R                  |  2 --
 tests/testthat/test-exclusions.R             |  6 +++++-
 tests/testthat/test-get_source_expressions.R | 10 +++++++++-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-cache.R b/tests/testthat/test-cache.R
index 941e7a8cd..9d4693a01 100644
--- a/tests/testthat/test-cache.R
+++ b/tests/testthat/test-cache.R
@@ -480,5 +480,3 @@ test_that("cache = TRUE works with nolint", {
   writeLines("1+1 # nolint\n", file)
   expect_length(lint(file, linters, cache = TRUE), 0)
 })
-
-# TODO test encoding-specific behaviour
diff --git a/tests/testthat/test-exclusions.R b/tests/testthat/test-exclusions.R
index f650feca8..394927eb7 100644
--- a/tests/testthat/test-exclusions.R
+++ b/tests/testthat/test-exclusions.R
@@ -26,6 +26,10 @@ test_that("it excludes properly", {
   }
 })
 
-# TODO test encoding-specific behaviour
+test_that("it doesn't fail when encountering misspecified encodings", {
+  read_settings(NULL)
+
+  expect_length(parse_exclusions("dummy_projects/project/cp1252.R"), 0L)
+})
 
 options(old_ops)
diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
index 9e30447f6..e89b292ba 100644
--- a/tests/testthat/test-get_source_expressions.R
+++ b/tests/testthat/test-get_source_expressions.R
@@ -104,7 +104,15 @@ test_that("Warns if encoding is misspecified", {
   read_settings(NULL)
   the_lint <- get_source_expressions(file)$error
   expect_s3_class(the_lint, "lint")
-  expect_equal(the_lint$message, "Invalid multibyte character in parser. Is the encoding correct?")
+
+  msg <- "Invalid multibyte character in parser. Is the encoding correct?"
+  if (.Platform$OS.type == "windows") {
+    # Windows parser throws a different error message because the source code is converted to native encoding
+    # This results in line 4 becoming <fc> <- 42 before the parser sees it.
+    msg <- "unexpected '<'"
+  }
+
+  expect_equal(the_lint$message, msg)
   expect_equal(the_lint$line_number, 4L)
 
   file <- "dummy_projects/project/cp1252_parseable.R"

From c5d5ceafdc9a3d38c185290174b93d109e1158a9 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Thu, 1 Jul 2021 11:23:12 +0200
Subject: [PATCH 19/26] update documentation, add NEWS bullet.

---
 NEWS.md                   | 2 ++
 README.md                 | 3 +++
 vignettes/using_lintr.Rmd | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 5cc18a17e..a1f3b8bd1 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -65,6 +65,8 @@
 * `lint()` now has a new optional argument `text` for supplying a string or lines directly, e.g. if the file is already in memory or linting is being done ad hoc. (#503, @renkun-ken)
 * New `pipe_call_linter()` enforces that all steps of `magrittr` pipelines use explicit calls instead of symbols, e.g. `x %>% mean()` instead of `x %>% mean` (@michaelchirico)
 * `get_source_expressions()` no longer fails if `getParseData()` returns a truncated (invalid) Unicode character as parsed text (#815, #816, @leogama)
+* lintr now supports non-system character Encodings. Auto-detects the correct encoding from .Rproj or DESCRIPTION 
+  files in your project. Override the default in the `encoding` setting of lintr. (#752, #782, @AshesITR)
 
 # lintr 2.0.1
 
diff --git a/README.md b/README.md
index 5eb157753..cc9b1665e 100644
--- a/README.md
+++ b/README.md
@@ -110,6 +110,7 @@ The config file (default file name: `.lintr`) is in [Debian Control Field Format
 - `exclude` - a regex pattern for lines to exclude from linting.  Default is "# nolint"
 - `exclude_start` - a regex pattern to start exclusion range. Default is "# nolint start"
 - `exclude_end` - a regex pattern to end exclusion range. Default is "# nolint end"
+- `encoding` - the encoding used for source files. Default inferred from .Rproj or DESCRIPTION files, fallback to UTF-8
 
 
 ### .lintr File Example
@@ -120,6 +121,7 @@ Below is an example .lintr file that uses:
 - Excludes a couple of files
 - Disables a specific linter, and; 
 - Sets different default exclude regexes
+- Specifies the file encoding to be ISO-8859-1 (Latin 1)
 
 ```
 linters: with_defaults(
@@ -130,6 +132,7 @@ exclusions: list("inst/doc/creating_linters.R" = 1, "inst/example/bad.R", "tests
 exclude: "# Exclude Linting"
 exclude_start: "# Begin Exclude Linting"
 exclude_end: "# End Exclude Linting"
+encoding: "ISO-8859-1"
 ```
 
 With the following command, you can create a configuration file for `lintr` that ignores all linters that show at least one error:
diff --git a/vignettes/using_lintr.Rmd b/vignettes/using_lintr.Rmd
index c731c9001..b65ca8a1d 100644
--- a/vignettes/using_lintr.Rmd
+++ b/vignettes/using_lintr.Rmd
@@ -115,6 +115,9 @@ defaults_table <- data.frame(
 knitr::kable(defaults_table)
 ```
 
+Note that the default `encoding` setting depends on the file to be linted.
+If an Encoding is found in a `.Rproj` file or a `DESCRIPTION` file, that encoding overrides the default of UTF-8.
+
 #### Customizing active linters
 
 If you only want to customize some linters, you can use the helper function `with_defaults()`, which will keep all

From 75fc2d844d63097f727ad79ec8449cb8ff3fc018 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Thu, 1 Jul 2021 11:45:57 +0200
Subject: [PATCH 20/26] fix lints, reduce cyclocomp of find_default_encoding()

---
 R/expect_lint.R                               |  2 +-
 R/settings.R                                  | 47 +++++++++----------
 .../testthat/dummy_packages/cp1252/R/cp1252.R |  2 +-
 .../testthat/dummy_projects/project/cp1252.R  |  2 +-
 .../dummy_projects/project/cp1252_parseable.R |  2 +-
 tests/testthat/test-get_source_expressions.R  |  2 +-
 6 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/R/expect_lint.R b/R/expect_lint.R
index 5b9c99a10..b362c0eca 100644
--- a/R/expect_lint.R
+++ b/R/expect_lint.R
@@ -46,7 +46,7 @@ expect_lint <- function(content, checks, ..., file = NULL, language = "en") {
   if (is.null(file)) {
     file <- tempfile()
     con <- base::file(file, encoding = "UTF-8")
-    on.exit({ unlink(file) }, add = TRUE)
+    on.exit(unlink(file), add = TRUE)
     writeLines(content, con = con, sep = "\n")
     close(con)
   }
diff --git a/R/settings.R b/R/settings.R
index 40ea790bb..d71bfcd01 100644
--- a/R/settings.R
+++ b/R/settings.R
@@ -112,39 +112,36 @@ find_default_encoding <- function(filename) {
 
   pkg_path <- find_package(filename)
   rproj_file <- find_rproj(filename)
+  pkg_enc <- get_encoding_from_dcf(file.path(pkg_path, "DESCRIPTION"))
+  rproj_enc <- get_encoding_from_dcf(rproj_file)
 
   if (!is.null(rproj_file) && !is.null(pkg_path) && startsWith(rproj_file, pkg_path)) {
     # Check precedence via directory hierarchy.
     # Both paths are normalized so checking if rproj_file is within pkg_path is sufficient.
-    # Force reading from Rproj file
-    pkg_path <- NULL
+    # Let Rproj file take precedence
+    return(rproj_enc %||% pkg_enc)
+  } else {
+    # Let DESCRIPTION file take precedence if .Rproj file is further up the directory hierarchy
+    return(pkg_enc %||% rproj_enc)
   }
 
-  if (!is.null(pkg_path)) {
-    # Get Encoding from DESCRIPTION
-    dcf <- tryCatch(
-      read.dcf(file.path(pkg_path, "DESCRIPTION")),
-      error = function(e) NULL
-    )
-    if (!is.null(dcf) && nrow(dcf) >= 1L && "Encoding" %in% colnames(dcf)) {
-      return(unname(dcf[1L, "Encoding"]))
-    }
-  }
+  NULL
+}
 
-  if (!is.null(rproj_file)) {
-    # Get Encoding from .Rproj
-    dcf <- tryCatch(
-      read.dcf(rproj_file),
-      error = function(e) NULL,
-      warning = function(e) NULL
-    )
+get_encoding_from_dcf <- function(file) {
+  if (is.null(file)) return(NULL)
 
-    if (!is.null(dcf) && nrow(dcf) >= 1L && "Encoding" %in% colnames(dcf)) {
-      encodings <- dcf[, "Encoding"]
-      encodings <- encodings[!is.na(encodings)]
-      if (length(encodings) > 0L) {
-        return(encodings[1L])
-      }
+  dcf <- tryCatch(
+    read.dcf(file),
+    error = function(e) NULL,
+    warning = function(e) NULL
+  )
+
+  if (!is.null(dcf) && nrow(dcf) >= 1L && "Encoding" %in% colnames(dcf)) {
+    encodings <- dcf[, "Encoding"]
+    encodings <- encodings[!is.na(encodings)]
+    if (length(encodings) > 0L) {
+      return(encodings[1L])
     }
   }
 
diff --git a/tests/testthat/dummy_packages/cp1252/R/cp1252.R b/tests/testthat/dummy_packages/cp1252/R/cp1252.R
index a2ffbe425..968f758cd 100644
--- a/tests/testthat/dummy_packages/cp1252/R/cp1252.R
+++ b/tests/testthat/dummy_packages/cp1252/R/cp1252.R
@@ -1,4 +1,4 @@
 # This file is encoded in Cp-1252
 
 # Comment containing non-ASCII ä
-ü <- 42
\ No newline at end of file
+ü <- 42
diff --git a/tests/testthat/dummy_projects/project/cp1252.R b/tests/testthat/dummy_projects/project/cp1252.R
index a2ffbe425..968f758cd 100644
--- a/tests/testthat/dummy_projects/project/cp1252.R
+++ b/tests/testthat/dummy_projects/project/cp1252.R
@@ -1,4 +1,4 @@
 # This file is encoded in Cp-1252
 
 # Comment containing non-ASCII ä
-ü <- 42
\ No newline at end of file
+ü <- 42
diff --git a/tests/testthat/dummy_projects/project/cp1252_parseable.R b/tests/testthat/dummy_projects/project/cp1252_parseable.R
index 6027ee98b..b9072a050 100644
--- a/tests/testthat/dummy_projects/project/cp1252_parseable.R
+++ b/tests/testthat/dummy_projects/project/cp1252_parseable.R
@@ -1,4 +1,4 @@
 # This file is encoded in Cp-1252
 
 # Comment containing non-ASCII ä
-a <- 42
\ No newline at end of file
+a <- 42
diff --git a/tests/testthat/test-get_source_expressions.R b/tests/testthat/test-get_source_expressions.R
index e89b292ba..f233a3ef2 100644
--- a/tests/testthat/test-get_source_expressions.R
+++ b/tests/testthat/test-get_source_expressions.R
@@ -1,7 +1,7 @@
 with_content_to_parse <- function(content, code) {
   f <- tempfile()
   con <- file(f, open = "w", encoding = "UTF-8")
-  on.exit({ unlink(f) })
+  on.exit(unlink(f))
   writeLines(content, con)
   close(con)
   source_expressions <- get_source_expressions(f)

From 7b4a9ae92267947f5762c464c8bf93d9b4009788 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Fri, 2 Jul 2021 18:06:26 +0200
Subject: [PATCH 21/26] return expressions as an empty list() instead of NULL
 if bad encodings appear.

---
 R/get_source_expressions.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
index 7e0a11696..6c0548bb7 100644
--- a/R/get_source_expressions.R
+++ b/R/get_source_expressions.R
@@ -227,7 +227,7 @@ get_source_expressions <- function(filename, lines = NULL) {
 
   if (inherits(e, "lint") && !nzchar(e$line)) {
     # Don't create expression list if it's unreliable (invalid encoding or unhandled parse error)
-    expressions <- NULL
+    expressions <- list()
   } else {
     expressions <- lapply(
       X = top_level_expressions(parsed_content),

From f8f5e7f6f03a646e26b34f223bb14e0a64538cc7 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Fri, 2 Jul 2021 18:09:50 +0200
Subject: [PATCH 22/26] more efficient find_rproj()

---
 R/lint.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/lint.R b/R/lint.R
index 4fd2673d1..a36335d4d 100644
--- a/R/lint.R
+++ b/R/lint.R
@@ -378,21 +378,21 @@ find_package <- function(path) {
   path
 }
 
-has_rproj <- function(path) {
-  length(list.files(path = path, pattern = "\\.Rproj$")) > 0L
+find_rproj_at <- function(path) {
+  head(list.files(path = path, pattern = "\\.Rproj$"), 1L)
 }
 
 find_rproj <- function(path) {
   path <- normalizePath(path, mustWork = FALSE)
 
-  while (!has_rproj(path)) {
+  while (length(res <- find_rproj_at(path)) == 0L) {
     path <- dirname(path)
     if (is_root(path)) {
       return(NULL)
     }
   }
 
-  list.files(path = path, pattern = "\\.Rproj$", full.names = TRUE)[1L]
+  res
 }
 
 is_root <- function(path) {

From 88cb2fbba8cd642dae5050dc2f316e1e29132408 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Fri, 2 Jul 2021 18:12:43 +0200
Subject: [PATCH 23/26] simplify get_encoding_from_dcf()

---
 R/settings.R | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/R/settings.R b/R/settings.R
index d71bfcd01..62190d130 100644
--- a/R/settings.R
+++ b/R/settings.R
@@ -131,14 +131,13 @@ find_default_encoding <- function(filename) {
 get_encoding_from_dcf <- function(file) {
   if (is.null(file)) return(NULL)
 
-  dcf <- tryCatch(
-    read.dcf(file),
+  encodings <- tryCatch(
+    drop(read.dcf(file, "Encoding")),
     error = function(e) NULL,
     warning = function(e) NULL
   )
 
-  if (!is.null(dcf) && nrow(dcf) >= 1L && "Encoding" %in% colnames(dcf)) {
-    encodings <- dcf[, "Encoding"]
+  if (!is.null(encodings)) {
     encodings <- encodings[!is.na(encodings)]
     if (length(encodings) > 0L) {
       return(encodings[1L])

From ba7714509f7a6c20a70ef38831fc342cb7af844f Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Fri, 2 Jul 2021 18:22:03 +0200
Subject: [PATCH 24/26] full.names = TRUE D'oh

---
 R/lint.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/lint.R b/R/lint.R
index a36335d4d..bdd86bb23 100644
--- a/R/lint.R
+++ b/R/lint.R
@@ -379,7 +379,7 @@ find_package <- function(path) {
 }
 
 find_rproj_at <- function(path) {
-  head(list.files(path = path, pattern = "\\.Rproj$"), 1L)
+  head(list.files(path = path, pattern = "\\.Rproj$", full.names = TRUE), 1L)
 }
 
 find_rproj <- function(path) {

From 761d6259dd5fcbc42854d35a86c36e706bc3a9ba Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Fri, 2 Jul 2021 18:42:39 +0200
Subject: [PATCH 25/26] remove pointless NULL check and make sure result is not
 named

---
 R/settings.R | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/R/settings.R b/R/settings.R
index 62190d130..2547916df 100644
--- a/R/settings.R
+++ b/R/settings.R
@@ -132,16 +132,14 @@ get_encoding_from_dcf <- function(file) {
   if (is.null(file)) return(NULL)
 
   encodings <- tryCatch(
-    drop(read.dcf(file, "Encoding")),
+    unname(drop(read.dcf(file, "Encoding"))),
     error = function(e) NULL,
     warning = function(e) NULL
   )
 
-  if (!is.null(encodings)) {
-    encodings <- encodings[!is.na(encodings)]
-    if (length(encodings) > 0L) {
-      return(encodings[1L])
-    }
+  encodings <- encodings[!is.na(encodings)]
+  if (length(encodings) > 0L) {
+    return(encodings[1L])
   }
 
   NULL

From 5c8f4bf56ec0be47ef3a45c84efccee442bea6c7 Mon Sep 17 00:00:00 2001
From: Alexander Rosenstock <alexander.rosenstock@web.de>
Date: Fri, 2 Jul 2021 18:48:48 +0200
Subject: [PATCH 26/26] remove unreachable code

---
 R/settings.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R/settings.R b/R/settings.R
index 2547916df..fdc4863ae 100644
--- a/R/settings.R
+++ b/R/settings.R
@@ -124,8 +124,6 @@ find_default_encoding <- function(filename) {
     # Let DESCRIPTION file take precedence if .Rproj file is further up the directory hierarchy
     return(pkg_enc %||% rproj_enc)
   }
-
-  NULL
 }
 
 get_encoding_from_dcf <- function(file) {