diff --git a/NEWS.md b/NEWS.md index ab7192af1fb87b..2e10b875e9af5a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,9 @@ New language features in `[A; B]` has always described concatenating along the first dimension (vertically), now two semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the third, and so on. ([#33697]) +* A backslash (`\`) before a newline inside a string literal now removes the newline while also + respecting indentation. This can be used to split up long strings without newlines into multiple + lines of code. ([#40753]) Language changes ---------------- @@ -114,6 +117,8 @@ Standard library changes * `@lock` is now exported from Base ([#39588]). * The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported from Base and given a more specific return type ([#29901]) * Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`, `acotd`, `atand` now accept an square matrix ([#39758]). +* A backslash before a newline in command literals now always removes the newline, similar to standard string + literals, whereas the result was not well-defined before. ([#40753]) #### Package Manager diff --git a/base/shell.jl b/base/shell.jl index 99866c8010b0f5..e94ec466e2c05b 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -87,15 +87,18 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; elseif !in_single_quotes && c == '"' in_double_quotes = !in_double_quotes i = consume_upto!(arg, s, i, j) - elseif c == '\\' - if in_double_quotes + elseif !in_single_quotes && c == '\\' + if !isempty(st) && peek(st)[2] == '\n' + i = consume_upto!(arg, s, i, j) + 1 + _ = popfirst!(st) + elseif in_double_quotes isempty(st) && error("unterminated double quote") k, c′ = peek(st) if c′ == '"' || c′ == '$' || c′ == '\\' i = consume_upto!(arg, s, i, j) _ = popfirst!(st) end - elseif !in_single_quotes + else isempty(st) && error("dangling backslash") i = consume_upto!(arg, s, i, j) _ = popfirst!(st) diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md index 4b3c35d5b45f6b..56a5a20c1cef49 100644 --- a/doc/src/manual/strings.md +++ b/doc/src/manual/strings.md @@ -166,6 +166,14 @@ julia> """Contains "quote" characters""" "Contains \"quote\" characters" ``` +Long lines in strings can be broken up by preceding the newline with a backslash (`\`): + +```jldoctest +julia> "This is a long \ + line" +"This is a long line" +``` + If you want to extract a character from a string, you index into it: ```jldoctest helloworldstring @@ -639,6 +647,15 @@ julia> """ "Hello,\nworld." ``` +If the newline is removed using a backslash, dedentation will be respected as well: + +```jldoctest +julia> """ + Averylong\ + word""" +"Averylongword" +``` + Trailing whitespace is left unaltered. Triple-quoted string literals can contain `"` characters without escaping. diff --git a/src/julia-parser.scm b/src/julia-parser.scm index f68e0d2c4ebb5c..70912c4272c8c7 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -311,6 +311,9 @@ (define (numchk n s) (or n (error (string "invalid numeric constant \"" s "\"")))) +(define (string-lastchar s) + (string.char s (string.dec s (length s)))) + (define (read-number port leadingdot neg) (let ((str (open-output-string)) (pred char-numeric?) @@ -412,7 +415,7 @@ (string.sub s 1) s) r is-float32-literal))) - (if (and (eqv? #\. (string.char s (string.dec s (length s)))) + (if (and (eqv? #\. (string-lastchar s)) (let ((nxt (peek-char port))) (and (not (eof-object? nxt)) (or (identifier-start-char? nxt) @@ -2182,16 +2185,35 @@ (define (unescape-parsed-string-literal strs) (map-at even? unescape-string strs)) +;; remove `\` followed by a newline +(define (strip-escaped-newline s) + (let ((in (open-input-string s)) + (out (open-output-string))) + (define (loop preceding-backslash?) + (let ((c (read-char in))) + (cond ((eof-object? c)) + (preceding-backslash? + (if (not (eqv? c #\newline)) + (begin (write-char #\\ out) (write-char c out))) + (loop #f)) + ((eqv? c #\\) (loop #t)) + (else (write-char c out) (loop #f))))) + (loop #f) + (io.tostring! out))) + (define (parse-string-literal s delim raw) - (let ((p (ts:port s))) - ((if raw identity unescape-parsed-string-literal) - (if (eqv? (peek-char p) delim) - (if (eqv? (peek-char (take-char p)) delim) - (map-first strip-leading-newline - (dedent-triplequoted-string - (parse-string-literal- 2 (take-char p) s delim raw))) - (list "")) - (parse-string-literal- 0 p s delim raw))))) + (let* ((p (ts:port s)) + (str (if (eqv? (peek-char p) delim) + (if (eqv? (peek-char (take-char p)) delim) + (map-first strip-leading-newline + (dedent-triplequoted-string + (parse-string-literal- 2 (take-char p) s delim raw))) + (list "")) + (parse-string-literal- 0 p s delim raw)))) + (if raw str (unescape-parsed-string-literal + (map (lambda (s) + (if (string? s) (strip-escaped-newline s) s)) + str))))) (define (strip-leading-newline s) (let ((n (sizeof s))) diff --git a/test/syntax.jl b/test/syntax.jl index 5a3af3b1863cb0..7cee3c0755f656 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -2830,3 +2830,100 @@ end x[3], x[1:2]... = x @test x == [2, 3, 1] end + +@testset "escaping newlines inside strings" begin + c = "c" + + @test "a\ +b" == "ab" + @test "a\ + b" == "a b" + @test raw"a\ +b" == "a\\\nb" + @test "a$c\ +b" == "acb" + @test "\\ +" == "\\\n" + + + @test """ + a\ + b""" == "ab" + @test """ + a\ + b""" == "a b" + @test """ + a\ + b""" == " ab" + @test raw""" + a\ + b""" == "a\\\nb" + @test """ + a$c\ + b""" == "acb" + + @test """ + \ + """ == "" + @test """ + \\ + """ == "\\\n" + @test """ + \\\ + """ == "\\" + @test """ + \\\\ + """ == "\\\\\n" + @test """ + \\\\\ + """ == "\\\\" + @test """ + \ + \ + """ == "" + @test """ + \\ + \ + """ == "\\\n" + @test """ + \\\ + \ + """ == "\\" + + + @test `a\ +b` == `ab` + @test `a\ + b` == `a b` + @test `a$c\ +b` == `acb` + @test `"a\ +b"` == `ab` + @test `'a\ +b'` == `$("a\\\nb")` + @test `\\ +` == `'\'` + + + @test ``` + a\ + b``` == `ab` + @test ``` + a\ + b``` == `a b` + @test ``` + a\ + b``` == ` ab` + @test ``` + a$c\ + b``` == `acb` + @test ``` + "a\ + b"``` == `ab` + @test ``` + 'a\ + b'``` == `$("a\\\nb")` + @test ``` + \\ + ``` == `'\'` +end