diff --git a/lib/ymlr/encode.ex b/lib/ymlr/encode.ex index c2a6301..cc32ab5 100644 --- a/lib/ymlr/encode.ex +++ b/lib/ymlr/encode.ex @@ -38,6 +38,11 @@ defmodule Ymlr.Encode do ":" ] + @escape_chars ~c"\b\f\r\v\0\"\\" + @escape_char_mapping Enum.zip(@escape_chars, ~c"bfrv0\"\\") + @unicode_char_mapping (Enum.to_list(0x00..0x1F) ++ Enum.to_list(0x7F..0xFF)) |> Enum.reject(&Kernel.in(&1, '\n\t' ++ @escape_chars)) + @require_double_quotes Enum.map(~c"\b\f\r\v\0"++@unicode_char_mapping, &(<< &1 >>)) |>dbg + @doc ~S""" Encodes the given data as YAML string. Raises if it cannot be encoded. @@ -146,6 +151,7 @@ defmodule Ymlr.Encode do defp encode_binary(data, indent_level) do cond do data == "" -> ~S('') + data == "~" -> ~S('~') data == "\n" -> ~S("\n") data == "null" -> ~S('null') data == "yes" -> ~S('yes') @@ -155,6 +161,7 @@ defmodule Ymlr.Encode do data == "True" -> ~S('True') data == "False" -> ~S('False') String.contains?(data, "\n") -> multiline(data, indent_level) + String.contains?(data, @require_double_quotes) -> with_double_quotes(data) String.at(data, 0) in @quote_when_first -> with_quotes(data) String.at(data, -1) in @quote_when_last -> with_quotes(data) String.starts_with?(data, "- ") -> with_quotes(data) @@ -186,18 +193,34 @@ defmodule Ymlr.Encode do defp with_quotes(data) do if String.contains?(data, "'") do - ~s("#{escape(data)}") + with_double_quotes(data) else - ~s('#{data}') + with_single_quotes(data) end end + defp with_double_quotes(data) do + ~s("#{escape(data)}") + end + + defp with_single_quotes(data), do: ~s('#{data}') defp escape(data) do - data - |> String.replace("\\", "\\\\") - |> String.replace(~S("), ~S(\")) + for << char::utf8 <- data >> do + escape_char(char) + end end + for {char, escaped} <- @escape_char_mapping do + defp escape_char(unquote(char)), do: << ?\\, unquote(escaped) >> + end + + for uchar <- @unicode_char_mapping do + unicode_sequence = List.to_string(:io_lib.format("\\u~4.16.0B", [uchar])) + defp escape_char(unquote(uchar)), do: unquote(unicode_sequence) + end + + defp escape_char(char), do: char + # for example for map keys defp multiline(data, nil), do: inspect(data) # see https://yaml-multiline.info/ diff --git a/test/ymlr/encode_test.exs b/test/ymlr/encode_test.exs index d673392..3361903 100644 --- a/test/ymlr/encode_test.exs +++ b/test/ymlr/encode_test.exs @@ -25,6 +25,7 @@ defmodule Ymlr.EncodeTest do test "quoted strings - avoid type confusion" do assert_identity_and_output("yes", ~S('yes')) assert_identity_and_output("no", ~S('no')) + assert_identity_and_output("~", "'~'") assert_identity_and_output("true", ~S('true')) assert_identity_and_output("false", ~S('false')) assert_identity_and_output("True", ~S('True')) @@ -115,27 +116,18 @@ defmodule Ymlr.EncodeTest do # https://yaml.org/spec/1.2.2/#example-escaped-characters test "quoted strings - example-escaped-characters from 1.2.2 spec" do assert_identity_and_output("Fun with \\", "Fun with \\") - end - - test "quoted strings - in map key (requires escape char)" do - assert_identity_and_output(%{"a\tb" => "value"}, ~s(a\tb: value)) - end - - @tag skip: "Identity test fails" - test "Special bytes" do - assert_identity_and_output(%{"a\rb" => "value"}, ~s(a\rb: value)) - assert_identity_and_output("\n \r \t \u000b \u0000", "|-\n\n \r \t \v \0") - + assert_identity_and_output("\r \t \u000b \u0000", "\"\\r \t \\v \\0\"") assert_identity_and_output( "\u0020 \u00a0 \u0085 \u2028 \u2029", - <<32, 32, 194, 160, 32, 194, 133, 32, 226, 128, 168, 32, 226, 128, 169>> + "\" \\u00A0 \\u0085 \u2028 \u2029\"" ) + assert_identity_and_output("\" \u0007 \b \u001b \f", "\"\\\" \\u0007 \\b \\u001B \\f\"") + assert_identity_and_output("\r \t \u000b \u0000", "\"\\r \t \\v \\0\"") end - @tag skip: "YamlElixir throws a parsing Error" - test "Special bytes 2" do - assert_identity_and_output("\" \u0007 \b \u001b \f", "'\" \a \b \e \f'") - assert_identity_and_output("\r \t \u000b \u0000", "'\r \t \v \0'") + test "quoted strings - in map key (requires escape char)" do + assert_identity_and_output(%{"a\tb" => "value"}, ~s(a\tb: value)) + assert_identity_and_output(%{"a\rb" => "value"}, ~s("a\\rb": value)) end test "newline in map key" do