From 8c18aeeca08f864f684e195fa5ab2c6360503e02 Mon Sep 17 00:00:00 2001 From: Michael Ruoss Date: Thu, 23 Nov 2023 18:05:02 +0100 Subject: [PATCH] performance optimizations --- BENCHMARK.md | 84 +++++++++++++++++++-------------------- lib/ymlr/encode.ex | 20 ++++++++-- test/ymlr/encode_test.exs | 7 +++- 3 files changed, 63 insertions(+), 48 deletions(-) diff --git a/BENCHMARK.md b/BENCHMARK.md index 3135265..70d8678 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -1,6 +1,6 @@ Benchmark -Benchmark run from 2023-11-21 21:51:23.068588Z UTC +Benchmark run from 2023-11-23 17:53:11.749718Z UTC ## System @@ -65,20 +65,20 @@ Run Time Jason - 273.46 - 3.66 ms - ±15.86% - 3.37 ms - 5.53 ms + 248.88 + 4.02 ms + ±14.75% + 3.81 ms + 5.72 ms Ymlr - 3.50 - 286.06 ms - ±0.60% - 286.03 ms - 289.08 ms + 3.27 + 305.72 ms + ±1.14% + 305.02 ms + 317.72 ms @@ -93,14 +93,14 @@ Run Time Comparison Slower Jason - 273.46 + 248.88   Ymlr - 3.50 - 78.23x + 3.27 + 76.09x @@ -122,8 +122,8 @@ Memory Usage Ymlr - 67.38 MB - 14.02x + 68.18 MB + 14.19x @@ -145,20 +145,20 @@ Run Time Jason - 127.15 - 7.86 ms - ±14.06% - 7.44 ms + 121.34 + 8.24 ms + ±142.36% + 7.35 ms 13.61 ms Ymlr - 22.26 - 44.91 ms - ±19.91% - 44.52 ms - 58.06 ms + 21.10 + 47.39 ms + ±62.18% + 43.84 ms + 319.16 ms @@ -173,14 +173,14 @@ Run Time Comparison Slower Jason - 127.15 + 121.34   Ymlr - 22.26 - 5.71x + 21.10 + 5.75x @@ -225,20 +225,20 @@ Run Time Jason - 391.80 - 2.55 ms - ±4.91% + 368.32 + 2.72 ms + ±251.44% 2.57 ms - 2.94 ms + 3.29 ms Ymlr - 4.93 - 202.80 ms - ±1.12% - 202.63 ms - 208.55 ms + 4.76 + 210.14 ms + ±0.45% + 209.83 ms + 211.96 ms @@ -253,14 +253,14 @@ Run Time Comparison Slower Jason - 391.80 + 368.32   Ymlr - 4.93 - 79.46x + 4.76 + 77.4x @@ -282,7 +282,7 @@ Memory Usage Ymlr - 65.35 MB - 25.69x + 65.86 MB + 25.89x \ No newline at end of file diff --git a/lib/ymlr/encode.ex b/lib/ymlr/encode.ex index 9a98ec8..a4c1ee5 100644 --- a/lib/ymlr/encode.ex +++ b/lib/ymlr/encode.ex @@ -125,12 +125,18 @@ defmodule Ymlr.Encode do # Printable ASCII Enum.to_list(0x20..0x7E), # Basic Multilingual Plane (BMP) - Enum.to_list(0xA0..0xFF) + Enum.to_list(0xA0..0xD7FF), + Enum.to_list(0xE000..0xFFFD), + # 32 bit + Enum.to_list(0x010000..0x10FFFF) ]) + @not_supported_by_elixir Enum.to_list(0xD800..0xDFFF) + # Non-Printable Characters (8-bit only for now) - all chars minus union of printable and escape chars: - @non_printable_chars Enum.to_list(0x00..0xFF) -- - (@printable_chars ++ @escape_if_within_double_quotes) + @non_printable_chars Enum.to_list(0..0x10FFFF) -- + (@printable_chars ++ + @escape_if_within_double_quotes ++ @not_supported_by_elixir) # Chars that, if contained within, force the string to be double-quoted: @chars_forcing_double_quotes_strings Enum.map( @@ -312,7 +318,13 @@ defmodule Ymlr.Encode do end for uchar <- @non_printable_chars do - unicode_sequence = List.to_string(:io_lib.format("\\x~2.16.0B", [uchar])) + unicode_sequence = + case uchar do + uchar when uchar <= 0xFF -> List.to_string(:io_lib.format("\\x~2.16.0B", [uchar])) + uchar when uchar <= 0xFFFF -> List.to_string(:io_lib.format("\\u~4.16.0B", [uchar])) + uchar -> List.to_string(:io_lib.format("\\U~6.16.0B", [uchar])) + end + defp escape_char(unquote(uchar)), do: unquote(unicode_sequence) end diff --git a/test/ymlr/encode_test.exs b/test/ymlr/encode_test.exs index 48a4782..a198484 100644 --- a/test/ymlr/encode_test.exs +++ b/test/ymlr/encode_test.exs @@ -130,8 +130,11 @@ defmodule Ymlr.EncodeTest do ~S(" \_ \N \L \P") ) - # Possible formats: \x13 \u0013 \U00000013. We use \x13 - assert_identity_and_output("\u0013", "\"\\x13\"") + # Possible formats: \x13 \u0013 \U00000013. + assert_identity_and_output( + "\u0013\uFFFD\uFFFE\u{10FFFF}", + "\"\\x13\uFFFD\\uFFFE\u{10FFFF}\"" + ) end test "quoted strings - in map key (requires escape char)" do