From bd266048bf5b3346cbfab264e681b0e5b9af7f90 Mon Sep 17 00:00:00 2001 From: Michael Ruoss Date: Sun, 19 Nov 2023 22:30:35 +0100 Subject: [PATCH] performance optimizations --- BENCHMARK.md | 90 +++++++++++++++++++-------------------- lib/ymlr/encode.ex | 34 +++++++++++---- test/ymlr/encode_test.exs | 17 +++++--- 3 files changed, 83 insertions(+), 58 deletions(-) diff --git a/BENCHMARK.md b/BENCHMARK.md index 96afd57..71ba904 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -1,6 +1,6 @@ Benchmark -Benchmark run from 2023-11-19 13:22:29.792152Z UTC +Benchmark run from 2023-11-20 14:52:22.679140Z UTC ## System @@ -65,20 +65,20 @@ Run Time Jason - 284.78 - 3.51 ms - ±13.65% - 3.25 ms - 4.71 ms + 279.16 + 3.58 ms + ±13.81% + 3.34 ms + 4.81 ms Ymlr - 1.58 - 632.01 ms - ±0.31% - 632.22 ms - 634.95 ms + 3.64 + 274.95 ms + ±0.71% + 275.05 ms + 278.58 ms @@ -93,14 +93,14 @@ Run Time Comparison Slower Jason - 284.78 + 279.16   Ymlr - 1.58 - 179.98x + 3.64 + 76.76x @@ -122,8 +122,8 @@ Memory Usage Ymlr - 104.88 MB - 21.83x + 65.74 MB + 13.68x @@ -145,20 +145,20 @@ Run Time Jason - 127.80 - 7.82 ms - ±14.23% - 7.43 ms - 13.36 ms + 122.70 + 8.15 ms + ±14.44% + 7.75 ms + 13.64 ms Ymlr - 22.00 - 45.46 ms - ±19.58% - 45.77 ms - 58.62 ms + 21.67 + 46.15 ms + ±20.12% + 46.06 ms + 61.63 ms @@ -173,14 +173,14 @@ Run Time Comparison Slower Jason - 127.80 + 122.70   Ymlr - 22.00 - 5.81x + 21.67 + 5.66x @@ -202,7 +202,7 @@ Memory Usage Ymlr - 50.11 MB + 50.09 MB 5.43x @@ -225,20 +225,20 @@ Run Time Jason - 351.13 - 2.85 ms - ±246.31% - 2.60 ms - 4.17 ms + 376.78 + 2.65 ms + ±3.50% + 2.64 ms + 2.87 ms Ymlr - 2.35 - 426.22 ms - ±0.78% - 425.46 ms - 435.53 ms + 5.40 + 185.32 ms + ±0.98% + 184.82 ms + 190.55 ms @@ -253,14 +253,14 @@ Run Time Comparison Slower Jason - 351.13 + 376.78   Ymlr - 2.35 - 149.66x + 5.40 + 69.83x @@ -282,7 +282,7 @@ Memory Usage Ymlr - 78.99 MB - 31.06x + 52.62 MB + 20.69x \ No newline at end of file diff --git a/lib/ymlr/encode.ex b/lib/ymlr/encode.ex index 485a620..be231ce 100644 --- a/lib/ymlr/encode.ex +++ b/lib/ymlr/encode.ex @@ -38,11 +38,29 @@ defmodule Ymlr.Encode do ":" ] - @escape_chars ~c"\b\f\r\v\0\"\\" - @escape_char_mapping Enum.zip(@escape_chars, ~c"bfrv0\"\\") - @unicode_chars Enum.to_list(0x00..0x1F) ++ Enum.to_list(0x7F..0xFF) - @unicode_char_mapping Enum.reject(@unicode_chars, &Kernel.in(&1, ~c"\n\t" ++ @escape_chars)) - @require_double_quotes Enum.map(~c"\b\f\r\v\0" ++ @unicode_char_mapping, &<<&1>>) + @printable_chars List.flatten([ + # Tab (\t) + 0x09, + # Line feed (LF \n) + 0x0A, + # Carriage Return (CR \r) + # 0x0D, theoretically printable, seems to require double quotes. + # Next Line (NEL) + 0x85, + # Printable ASCII + Enum.to_list(0x20..0x7E), + # Basic Multilingual Plane (BMP) + Enum.to_list(0xA0..0xFF) + ]) + + @non_printable_chars Enum.to_list(0x00..0xA0) -- @printable_chars + @quoted_when_special @non_printable_chars ++ ~c"\a\b\e\f\v\0\u00a0\u0085\u2028\u2029" + @quoted_when_special_strings Enum.map(@quoted_when_special, &<<&1::utf8>>) + + # see https://yaml.org/spec/1.2.2/#57-escaped-characters + @escape_chars ~c"\a\b\e\f\r\v\0\u00a0\u0085\u2028\u2029\"\\" + @escape_char_mapping Enum.zip(@escape_chars, ~c"abefrv0_NLP\"\\") + @non_printable_special_chars @non_printable_chars -- @escape_chars @doc ~S""" Encodes the given data as YAML string. Raises if it cannot be encoded. @@ -162,7 +180,7 @@ defmodule Ymlr.Encode do data == "True" -> ~S('True') data == "False" -> ~S('False') String.contains?(data, "\n") -> multiline(data, indent_level) - String.contains?(data, @require_double_quotes) -> with_double_quotes(data) + String.contains?(data, @quoted_when_special_strings) -> with_double_quotes(data) String.at(data, 0) in @quote_when_first -> with_quotes(data) String.at(data, -1) in @quote_when_last -> with_quotes(data) String.starts_with?(data, "- ") -> with_quotes(data) @@ -216,8 +234,8 @@ defmodule Ymlr.Encode do defp escape_char(unquote(char)), do: <> end - for uchar <- @unicode_char_mapping do - unicode_sequence = List.to_string(:io_lib.format("\\u~4.16.0B", [uchar])) + for uchar <- @non_printable_special_chars do + unicode_sequence = List.to_string(:io_lib.format("\\x~2.16.0B", [uchar])) defp escape_char(unquote(uchar)), do: unquote(unicode_sequence) end diff --git a/test/ymlr/encode_test.exs b/test/ymlr/encode_test.exs index 1f94242..48a4782 100644 --- a/test/ymlr/encode_test.exs +++ b/test/ymlr/encode_test.exs @@ -115,16 +115,23 @@ defmodule Ymlr.EncodeTest do # https://yaml.org/spec/1.2.2/#example-escaped-characters test "quoted strings - example-escaped-characters from 1.2.2 spec" do - assert_identity_and_output("Fun with \\", "Fun with \\") - assert_identity_and_output("\r \t \u000b \u0000", "\"\\r \t \\v \\0\"") + assert_identity_and_output(~S(Fun with \\), ~S(Fun with \\)) + assert_identity_and_output("\" \u0007 \b \u001b \f", ~S("\" \a \b \e \f")) + # Line breaks inside scalar content must be normalized by the YAML processor. + # Each such line break must be parsed into a single line feed character. + # The original line break format is a presentation detail and must not be + # used to convey content information. + # I.e. the following cannot be tested for identity as \r will be parsed as \n. + assert_output("\n\r \t \u000b \u0000", "|-\n\n \r \t \v \0") + assert_identity_and_output("\r \t \u000b \u0000", ~s("\\r \t \\v \\0")) assert_identity_and_output( "\u0020 \u00a0 \u0085 \u2028 \u2029", - "\" \\u00A0 \\u0085 \u2028 \u2029\"" + ~S(" \_ \N \L \P") ) - assert_identity_and_output("\" \u0007 \b \u001b \f", "\"\\\" \\u0007 \\b \\u001B \\f\"") - assert_identity_and_output("\r \t \u000b \u0000", "\"\\r \t \\v \\0\"") + # Possible formats: \x13 \u0013 \U00000013. We use \x13 + assert_identity_and_output("\u0013", "\"\\x13\"") end test "quoted strings - in map key (requires escape char)" do