From 2b0700cbefb3007fe035474109c4cf18352f86e0 Mon Sep 17 00:00:00 2001
From: Dominique Luna <dluna132@gmail.com>
Date: Sat, 21 Nov 2020 19:05:59 -0500
Subject: [PATCH] Count number of bytes in unicode whitespace (#334)

* Count number of bytes in unicode whitespace

fixes #332

* revert change from typo
---
 Project.toml     |  2 +-
 src/document.jl  |  2 ++
 test/document.jl | 37 ++++++++++++++++++++++++++++---------
 test/issues.jl   | 11 +++++++++++
 4 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/Project.toml b/Project.toml
index 0a94fc74c..c9a480bc0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "JuliaFormatter"
 uuid = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
 authors = ["Dominique Luna <dluna132@gmail.com>"]
-version = "0.10.8"
+version = "0.10.9"
 
 [deps]
 CSTParser = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
diff --git a/src/document.jl b/src/document.jl
index 40bdd8b08..843b3578e 100644
--- a/src/document.jl
+++ b/src/document.jl
@@ -149,6 +149,8 @@ function Document(text::AbstractString)
 
         if t.kind === Tokens.COMMENT
             goffset += (t.endbyte - t.startbyte + 1)
+        elseif t.kind === Tokens.WHITESPACE
+            goffset += (t.endbyte - t.startbyte + 1)
         else
             goffset += length(Tokenize.untokenize(t))
         end
diff --git a/test/document.jl b/test/document.jl
index 8f8730180..936b81b1e 100644
--- a/test/document.jl
+++ b/test/document.jl
@@ -1,12 +1,31 @@
 @testset "Document" begin
-    s = """
-    \"""
-    𝔽𝔽
+    @testset "count unicode literals in bytes" begin
+        s = """
+        \"""
+        𝔽𝔽
 
-    \"""
-    struct A end
-    """
-    d = JuliaFormatter.Document(s)
-    ranges = Dict(1 => 1:4, 2 => 5:7, 3 => 8:8, 4 => 9:12, 5 => 13:25, 6 => 26:25)
-    @test ranges == d.line_to_range
+        \"""
+        struct A end
+        """
+        d = JuliaFormatter.Document(s)
+        ranges = Dict(1 => 1:4, 2 => 5:7, 3 => 8:8, 4 => 9:12, 5 => 13:25, 6 => 26:25)
+        @test ranges == d.line_to_range
+    end
+
+    @testset "count unicode whitespace in bytes" begin
+        s0 = """a = b || c ;
+               f("A")"""
+        d = JuliaFormatter.Document(s0)
+        ranges = Dict(1 => 1:13, 2 => 14:19)
+        @test ranges == d.line_to_range
+
+        # this string has a nbsp after 'c'
+        # so it should have an additional byte because
+        # it's unicode
+        s = """a = b || c ;
+               f("A")"""
+        d = JuliaFormatter.Document(s)
+        ranges = Dict(1 => 1:14, 2 => 15:20)
+        @test ranges == d.line_to_range
+    end
 end
diff --git a/test/issues.jl b/test/issues.jl
index ac54c696a..4d6f2eeb1 100644
--- a/test/issues.jl
+++ b/test/issues.jl
@@ -621,4 +621,15 @@
         """
         @test format_text(str, align_assignment = true) == str
     end
+
+    @testset "issue 332" begin
+        # this string has a nbsp after 'c'
+        # so it should have an additional byte because
+        # it's unicode
+        str_ = """a = b || c ;
+               f("A")"""
+        str = """a = b || c;
+               f("A")"""
+        @test format_text(str_) == str
+    end
 end