From 8dfc17880cd9bb49d66142d7402d8701292f133a Mon Sep 17 00:00:00 2001 From: pete gamache <pete@gamache.org> Date: Wed, 10 Jan 2018 06:56:00 -0500 Subject: [PATCH 1/7] Jason.Formatter for pretty-printing and minimizing This commit adds Jason.Formatter, which provides functions to pretty-print and minimize JSON-formatted data: pretty_print/2 pretty_print_to_iolist/2 minimize/2 minimize_to_iolist/2 Input must be 8 bits wide (e.g., UTF-8, Latin-1, etc.) and is accepted in `binary` and `iolist` format. --- formatter_test_suite/backslash-string.json | 2 + .../backslash-string.min.json | 1 + .../backslash-string.pretty.json | 3 + formatter_test_suite/empty-list.json | 2 + formatter_test_suite/empty-list.min.json | 1 + formatter_test_suite/empty-list.pretty.json | 1 + formatter_test_suite/empty-nest.json | 1 + formatter_test_suite/empty-nest.min.json | 1 + formatter_test_suite/empty-nest.pretty.json | 3 + formatter_test_suite/empty-object.json | 1 + formatter_test_suite/empty-object.min.json | 1 + formatter_test_suite/empty-object.pretty.json | 1 + formatter_test_suite/multiple-objects.json | 4 + .../multiple-objects.min.json | 4 + .../multiple-objects.pretty.json | 18 ++ formatter_test_suite/simple-list.json | 1 + formatter_test_suite/simple-list.min.json | 1 + formatter_test_suite/simple-list.pretty.json | 5 + formatter_test_suite/simple-object.json | 2 + formatter_test_suite/simple-object.min.json | 1 + formatter_test_suite/simple-object.min.json~ | 1 + .../simple-object.pretty.json | 3 + lib/formatter.ex | 289 ++++++++++++++++++ test/formatter_test.exs | 68 +++++ 24 files changed, 415 insertions(+) create mode 100644 formatter_test_suite/backslash-string.json create mode 100644 formatter_test_suite/backslash-string.min.json create mode 100644 formatter_test_suite/backslash-string.pretty.json create mode 100644 formatter_test_suite/empty-list.json create mode 100644 formatter_test_suite/empty-list.min.json create mode 100644 formatter_test_suite/empty-list.pretty.json create mode 100644 formatter_test_suite/empty-nest.json create mode 100644 formatter_test_suite/empty-nest.min.json create mode 100644 formatter_test_suite/empty-nest.pretty.json create mode 100644 formatter_test_suite/empty-object.json create mode 100644 formatter_test_suite/empty-object.min.json create mode 100644 formatter_test_suite/empty-object.pretty.json create mode 100644 formatter_test_suite/multiple-objects.json create mode 100644 formatter_test_suite/multiple-objects.min.json create mode 100644 formatter_test_suite/multiple-objects.pretty.json create mode 100644 formatter_test_suite/simple-list.json create mode 100644 formatter_test_suite/simple-list.min.json create mode 100644 formatter_test_suite/simple-list.pretty.json create mode 100644 formatter_test_suite/simple-object.json create mode 100644 formatter_test_suite/simple-object.min.json create mode 100644 formatter_test_suite/simple-object.min.json~ create mode 100644 formatter_test_suite/simple-object.pretty.json create mode 100644 lib/formatter.ex create mode 100644 test/formatter_test.exs diff --git a/formatter_test_suite/backslash-string.json b/formatter_test_suite/backslash-string.json new file mode 100644 index 0000000..a6ba670 --- /dev/null +++ b/formatter_test_suite/backslash-string.json @@ -0,0 +1,2 @@ +{"s": "\"string \nwith \nback \n slashes\\\""} + diff --git a/formatter_test_suite/backslash-string.min.json b/formatter_test_suite/backslash-string.min.json new file mode 100644 index 0000000..7f843a7 --- /dev/null +++ b/formatter_test_suite/backslash-string.min.json @@ -0,0 +1 @@ +{"s":"\"string \nwith \nback \n slashes\\\""} \ No newline at end of file diff --git a/formatter_test_suite/backslash-string.pretty.json b/formatter_test_suite/backslash-string.pretty.json new file mode 100644 index 0000000..abc0d9d --- /dev/null +++ b/formatter_test_suite/backslash-string.pretty.json @@ -0,0 +1,3 @@ +{ + "s": "\"string \nwith \nback \n slashes\\\"" +} \ No newline at end of file diff --git a/formatter_test_suite/empty-list.json b/formatter_test_suite/empty-list.json new file mode 100644 index 0000000..7dd4387 --- /dev/null +++ b/formatter_test_suite/empty-list.json @@ -0,0 +1,2 @@ +[] + diff --git a/formatter_test_suite/empty-list.min.json b/formatter_test_suite/empty-list.min.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/formatter_test_suite/empty-list.min.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/formatter_test_suite/empty-list.pretty.json b/formatter_test_suite/empty-list.pretty.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/formatter_test_suite/empty-list.pretty.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/formatter_test_suite/empty-nest.json b/formatter_test_suite/empty-nest.json new file mode 100644 index 0000000..1721244 --- /dev/null +++ b/formatter_test_suite/empty-nest.json @@ -0,0 +1 @@ +[[]] diff --git a/formatter_test_suite/empty-nest.min.json b/formatter_test_suite/empty-nest.min.json new file mode 100644 index 0000000..00a55f0 --- /dev/null +++ b/formatter_test_suite/empty-nest.min.json @@ -0,0 +1 @@ +[[]] \ No newline at end of file diff --git a/formatter_test_suite/empty-nest.pretty.json b/formatter_test_suite/empty-nest.pretty.json new file mode 100644 index 0000000..b55ac47 --- /dev/null +++ b/formatter_test_suite/empty-nest.pretty.json @@ -0,0 +1,3 @@ +[ + [] +] \ No newline at end of file diff --git a/formatter_test_suite/empty-object.json b/formatter_test_suite/empty-object.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/formatter_test_suite/empty-object.json @@ -0,0 +1 @@ +{} diff --git a/formatter_test_suite/empty-object.min.json b/formatter_test_suite/empty-object.min.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/formatter_test_suite/empty-object.min.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/formatter_test_suite/empty-object.pretty.json b/formatter_test_suite/empty-object.pretty.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/formatter_test_suite/empty-object.pretty.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/formatter_test_suite/multiple-objects.json b/formatter_test_suite/multiple-objects.json new file mode 100644 index 0000000..665ef3e --- /dev/null +++ b/formatter_test_suite/multiple-objects.json @@ -0,0 +1,4 @@ +{"a": 1} +{"b": 2}{"a": {} } +{"c": {"d": [true, false, null]}} + diff --git a/formatter_test_suite/multiple-objects.min.json b/formatter_test_suite/multiple-objects.min.json new file mode 100644 index 0000000..215013b --- /dev/null +++ b/formatter_test_suite/multiple-objects.min.json @@ -0,0 +1,4 @@ +{"a":1} +{"b":2} +{"a":{}} +{"c":{"d":[true,false,null]}} \ No newline at end of file diff --git a/formatter_test_suite/multiple-objects.pretty.json b/formatter_test_suite/multiple-objects.pretty.json new file mode 100644 index 0000000..6150ba6 --- /dev/null +++ b/formatter_test_suite/multiple-objects.pretty.json @@ -0,0 +1,18 @@ +{ + "a": 1 +} +{ + "b": 2 +} +{ + "a": {} +} +{ + "c": { + "d": [ + true, + false, + null + ] + } +} \ No newline at end of file diff --git a/formatter_test_suite/simple-list.json b/formatter_test_suite/simple-list.json new file mode 100644 index 0000000..acfb21f --- /dev/null +++ b/formatter_test_suite/simple-list.json @@ -0,0 +1 @@ +[1, true, "three"] diff --git a/formatter_test_suite/simple-list.min.json b/formatter_test_suite/simple-list.min.json new file mode 100644 index 0000000..cd29fcc --- /dev/null +++ b/formatter_test_suite/simple-list.min.json @@ -0,0 +1 @@ +[1,true,"three"] \ No newline at end of file diff --git a/formatter_test_suite/simple-list.pretty.json b/formatter_test_suite/simple-list.pretty.json new file mode 100644 index 0000000..59f0655 --- /dev/null +++ b/formatter_test_suite/simple-list.pretty.json @@ -0,0 +1,5 @@ +[ + 1, + true, + "three" +] \ No newline at end of file diff --git a/formatter_test_suite/simple-object.json b/formatter_test_suite/simple-object.json new file mode 100644 index 0000000..f889d02 --- /dev/null +++ b/formatter_test_suite/simple-object.json @@ -0,0 +1,2 @@ +{"a": 1} + diff --git a/formatter_test_suite/simple-object.min.json b/formatter_test_suite/simple-object.min.json new file mode 100644 index 0000000..daa5053 --- /dev/null +++ b/formatter_test_suite/simple-object.min.json @@ -0,0 +1 @@ +{"a":1} \ No newline at end of file diff --git a/formatter_test_suite/simple-object.min.json~ b/formatter_test_suite/simple-object.min.json~ new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/formatter_test_suite/simple-object.min.json~ @@ -0,0 +1 @@ + diff --git a/formatter_test_suite/simple-object.pretty.json b/formatter_test_suite/simple-object.pretty.json new file mode 100644 index 0000000..885e71c --- /dev/null +++ b/formatter_test_suite/simple-object.pretty.json @@ -0,0 +1,3 @@ +{ + "a": 1 +} \ No newline at end of file diff --git a/lib/formatter.ex b/lib/formatter.ex new file mode 100644 index 0000000..f2e2748 --- /dev/null +++ b/lib/formatter.ex @@ -0,0 +1,289 @@ +defmodule Jason.Formatter do + @moduledoc ~S""" + `Jason.Formatter` provides pretty-printing and minimizing functions for + JSON-encoded data. + + Input is required to be in an 8-bit-wide encoding such as UTF-8 or Latin-1, + and is accepted in `iodata` (`binary` or `iolist`) format. + + Output is provided in either `binary` or `iolist` format. + """ + + @type opts :: [ + {:indent, iodata} | + {:line_separator, iodata} | + {:record_separator, iodata} | + {:after_colon, iodata} + ] + + + @doc ~S""" + Returns a binary containing a pretty-printed representation of + JSON-encoded `iodata`. + + `iodata` may contain multiple JSON objects or arrays, optionally separated + by whitespace (e.g., one object per line). Objects in `pretty_print`ed + output will be separated by newlines. No trailing newline is emitted. + + Options: + + * `:indent` sets the indentation string used for nested objects and + arrays. The default indent setting is two spaces (`" "`). + * `:line_separator` sets the newline string used in nested objects. + The default setting is a line feed (`"\n"`). + * `:record_separator` sets the string printed between root-level objects + and arrays. The default setting is `opts[:line_separator]`. + * `:after_colon` sets the string printed after a colon inside objects. + The default setting is one space (`" "`). + + Example: + + iex> Jason.Formatter.pretty_print(~s|{"a":{"b": [1, 2]}}|) + ~s|{ + "a": { + "b": [ + 1, + 2 + ] + } + }| + """ + @spec pretty_print(iodata, opts) :: binary + def pretty_print(iodata, opts \\ []) do + pretty_print_to_iolist(iodata, opts) + |> :erlang.list_to_binary + end + + + @doc ~S""" + Returns an iolist containing a pretty-printed representation of + JSON-encoded `iodata`. + + See `pretty_print/2` for details and options. + """ + @spec pretty_print_to_iolist(iodata, opts) :: iolist + def pretty_print_to_iolist(iodata, opts \\ []) do + depth = 0 + in_str = false + in_bs = false + empty = false + first = true + opts = normalize_opts(opts) + + {iolist, _state} = + pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) + + iolist + end + + + @doc ~S""" + Returns a binary containing a minimized representation of + JSON-encoded `iodata`. + + `iodata` may contain multiple JSON objects or arrays, optionally + separated by whitespace (e.g., one object per line). `minimize`d + output will contain one object per line. No trailing newline is emitted. + + The `:record_separator` option may be given to control the string + used as newline (default `"\n"`). Other options are ignored. + + Example: + + iex> Jason.Formatter.minimize(~s|{ "a" : "b" , "c": \n\n 2}|) + ~s|{"a":"b","c":2}| + """ + @spec minimize(iodata, opts) :: binary + def minimize(iodata, opts \\ []) do + minimize_to_iolist(iodata, opts) + |> :erlang.list_to_binary + end + + + @doc ~S""" + Returns an iolist containing a minimized representation of + JSON-encoded `iodata`. + + See `minimize/2` for details and options. + """ + @spec minimize_to_iolist(iodata, opts) :: iolist + def minimize_to_iolist(iodata, opts) do + pretty_print_to_iolist( + iodata, + indent: "", + line_separator: "", + record_separator: opts[:record_separator] || "\n", + after_colon: "" + ) + end + + + ## Returns a copy of `opts` with defaults applied + @spec normalize_opts(keyword) :: opts + defp normalize_opts(opts) do + [ + indent: opts[:indent] || " ", + line_separator: opts[:line_separator] || "\n", + record_separator: opts[:record_separator] || opts[:line_separator] || "\n", + after_colon: opts[:after_colon] || " ", + ] + end + + + ## Returns an iolist containing `depth` instances of `opts[:indent]` + @spec tab(opts, non_neg_integer, iolist) :: iolist + defp tab(opts, depth, output \\ []) do + if depth < 1 do + output + else + tab(opts, depth-1, [opts[:indent] | output]) + end + end + + + @typep pp_state :: { + non_neg_integer, ## depth -- current nesting depth + boolean, ## in_str -- is the current byte in a string? + boolean, ## in_bs -- does the current byte follow a backslash in a string? + boolean, ## empty -- is the current object or array empty? + boolean, ## first -- is this the first object or array in the input? + } + + @spec pp_iodata( + iodata, ## input -- input data + iolist, ## output_acc -- output iolist (built in reverse order) + non_neg_integer, ## depth -- current nesting depth + boolean, ## in_str -- is the current byte in a string? + boolean, ## in_bs -- does the current byte follow a backslash in a string? + boolean, ## empty -- is the current object or array empty? + boolean, ## first -- is this the first object or array in the input? + opts + ) :: {iolist, pp_state} + defp pp_iodata(input, output_acc, depth, in_str, in_bs, empty, first, opts) + + defp pp_iodata("", output_acc, depth, in_str, in_bs, empty, first, opts) do + {:lists.reverse(output_acc), {depth, in_str, in_bs, empty, first, opts}} + end + + defp pp_iodata([], output_acc, depth, in_str, in_bs, empty, first, opts) do + {:lists.reverse(output_acc), {depth, in_str, in_bs, empty, first, opts}} + end + + defp pp_iodata(<<byte::size(8), rest::binary>>, output_acc, depth, in_str, in_bs, empty, first, opts) do + pp_byte(byte, rest, output_acc, depth, in_str, in_bs, empty, first, opts) + end + + defp pp_iodata(byte, output_acc, depth, in_str, in_bs, empty, first, opts) when is_number(byte) do + pp_byte(byte, [], output_acc, depth, in_str, in_bs, empty, first, opts) + end + + defp pp_iodata(list, output_acc, depth, in_str, in_bs, empty, first, opts) when is_list(list) do + starting_state = {depth, in_str, in_bs, empty, first, opts} + {reversed_output, end_state} = Enum.reduce list, {[], starting_state}, fn (item, {output_acc, state}) -> + {depth, in_str, in_bs, empty, first, opts} = state + {item_output, new_state} = pp_iodata(item, [], depth, in_str, in_bs, empty, first, opts) + {[item_output | output_acc], new_state} + end + {[:lists.reverse(reversed_output) | output_acc], end_state} + end + + + @spec pp_byte( + byte, ## byte -- current byte + iodata, ## rest -- rest of input data + iolist, ## output -- output iolist (built in reverse order) + non_neg_integer, ## depth -- current nesting depth + boolean, ## in_str -- is the current byte in a string? + boolean, ## in_bs -- does the current byte follow a backslash in a string? + boolean, ## empty -- is the current object or array empty? + boolean, ## first -- is this the first object or array in the input? + opts + ) :: {iolist, pp_state} + defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) + + ## in string, following backslash + defp pp_byte(byte, rest, output, depth, true=in_str, true=_in_bs, empty, first, opts) do + in_bs = false + pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + end + + ## in string, backslash + defp pp_byte(byte, rest, output, depth, true=in_str, _in_bs, empty, first, opts) + when byte in '\\' do + in_bs = true + pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + end + + ## in string, end quote + defp pp_byte(byte, rest, output, depth, true=_in_str, in_bs, empty, first, opts) + when byte in '"' do + in_str = false + pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + end + + ## in string, other character + defp pp_byte(byte, rest, output, depth, true=in_str, in_bs, empty, first, opts) do + pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + end + + ## out of string, whitespace + defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) + when byte in ' \n\r\t' do + pp_iodata(rest, output, depth, in_str, in_bs, empty, first, opts) + end + + ## out of string, start block + defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) + when byte in '{[' do + out = cond do + first -> byte + empty -> [opts[:line_separator], tab(opts, depth), byte] + depth == 0 -> [opts[:record_separator], byte] + :else -> byte + end + first = false + empty = true + depth = depth + 1 + pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + end + + ## out of string, end empty block + defp pp_byte(byte, rest, output, depth, in_str, in_bs, true=_empty, first, opts) + when byte in '}]' do + empty = false + depth = depth - 1 + pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + end + + ## out of string, end non-empty block + defp pp_byte(byte, rest, output, depth, in_str, in_bs, false=empty, first, opts) + when byte in '}]' do + depth = depth - 1 + out = [opts[:line_separator], tab(opts, depth), byte] + pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + end + + ## out of string, comma + defp pp_byte(byte, rest, output, depth, in_str, in_bs, _empty, first, opts) + when byte in ',' do + empty = false + out = [byte, opts[:line_separator], tab(opts, depth)] + pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + end + + ## out of string, colon + defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) + when byte in ':' do + out = [byte, opts[:after_colon]] + pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + end + + ## out of string, other character (maybe start quote) + defp pp_byte(byte, rest, output, depth, _in_str, in_bs, empty, first, opts) do + out = if empty, do: [opts[:line_separator], tab(opts, depth), byte], else: byte + in_str = byte in '"' + empty = false + pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + end +end + diff --git a/test/formatter_test.exs b/test/formatter_test.exs new file mode 100644 index 0000000..32feec7 --- /dev/null +++ b/test/formatter_test.exs @@ -0,0 +1,68 @@ +defmodule Jason.FormatterTest do + use ExUnit.Case, async: true + import Jason.Formatter + doctest Jason.Formatter + + @test_cases [ + "empty-list", + "empty-object", + "simple-list", + "simple-object", + "multiple-objects", + "backslash-string", + "empty-nest", + ] + + for name <- @test_cases do + input = File.open!("formatter_test_suite/#{name}.json") |> IO.binread(:all) + pretty = File.open!("formatter_test_suite/#{name}.pretty.json") |> IO.binread(:all) + min = File.open!("formatter_test_suite/#{name}.min.json") |> IO.binread(:all) + + test "#{name} |> pretty_print" do + assert(pretty_print(unquote(input)) == unquote(pretty)) + end + + test "#{name} |> minimize" do + assert(minimize(unquote(input)) == unquote(min)) + end + + test "#{name} |> pretty_print |> pretty_print" do + p = unquote(input) |> pretty_print |> pretty_print + assert(p == unquote(pretty)) + end + + test "#{name} |> minimize |> minimize" do + m = unquote(input) |> minimize |> minimize + assert(m == unquote(min)) + end + + test "#{name} |> pretty_print |> minimize |> pretty_print" do + p = unquote(input) |> pretty_print |> minimize |> pretty_print + assert(p == unquote(pretty)) + end + + test "#{name} |> minimize |> pretty_print |> minimize" do + m = unquote(input) |> minimize |> pretty_print |> minimize + assert(m == unquote(min)) + end + end + + test "pretty_print iolist" do + input = [['{"a":', " 3.14159", []], [[44]], "\"b\":", '1}'] + output = ~s|{\n "a": 3.14159,\n "b": 1\n}| + assert(pretty_print(input) == output) + end + + test "minimize iolist" do + input = [['{\n"a": ', " 3.14159", []], [[44], '"'], "b\":\t", '1\n\n}'] + output = ~s|{"a":3.14159,"b":1}| + assert(minimize(input) == output) + end + + test "pretty_print indent string" do + input = ~s|{"a": {"b": [true, false]}}| + output = ~s|{\n\t"a": {\n\t\t"b": [\n\t\t\ttrue,\n\t\t\tfalse\n\t\t]\n\t}\n}| + assert(pretty_print(input, indent: "\t") == output) + end +end + From 7740ade0e7972ea6f50a054b642f3d94e425cdd7 Mon Sep 17 00:00:00 2001 From: pete gamache <pete@gamache.org> Date: Wed, 28 Mar 2018 10:50:14 -0400 Subject: [PATCH 2/7] superficial changes to formatter --- lib/formatter.ex | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/lib/formatter.ex b/lib/formatter.ex index f2e2748..d927abb 100644 --- a/lib/formatter.ex +++ b/lib/formatter.ex @@ -50,8 +50,8 @@ defmodule Jason.Formatter do """ @spec pretty_print(iodata, opts) :: binary def pretty_print(iodata, opts \\ []) do - pretty_print_to_iolist(iodata, opts) - |> :erlang.list_to_binary + pretty_print_to_iodata(iodata, opts) + |> IO.iodata_to_binary end @@ -61,8 +61,8 @@ defmodule Jason.Formatter do See `pretty_print/2` for details and options. """ - @spec pretty_print_to_iolist(iodata, opts) :: iolist - def pretty_print_to_iolist(iodata, opts \\ []) do + @spec pretty_print_to_iodata(iodata, opts) :: iodata + def pretty_print_to_iodata(iodata, opts \\ []) do depth = 0 in_str = false in_bs = false @@ -70,10 +70,10 @@ defmodule Jason.Formatter do first = true opts = normalize_opts(opts) - {iolist, _state} = + {output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) - iolist + output end @@ -95,8 +95,8 @@ defmodule Jason.Formatter do """ @spec minimize(iodata, opts) :: binary def minimize(iodata, opts \\ []) do - minimize_to_iolist(iodata, opts) - |> :erlang.list_to_binary + minimize_to_iodata(iodata, opts) + |> IO.iodata_to_binary end @@ -106,9 +106,9 @@ defmodule Jason.Formatter do See `minimize/2` for details and options. """ - @spec minimize_to_iolist(iodata, opts) :: iolist - def minimize_to_iolist(iodata, opts) do - pretty_print_to_iolist( + @spec minimize_to_iodata(iodata, opts) :: iodata + def minimize_to_iodata(iodata, opts) do + pretty_print_to_iodata( iodata, indent: "", line_separator: "", @@ -151,14 +151,14 @@ defmodule Jason.Formatter do @spec pp_iodata( iodata, ## input -- input data - iolist, ## output_acc -- output iolist (built in reverse order) + iodata, ## output_acc -- output iolist (built in reverse order) non_neg_integer, ## depth -- current nesting depth boolean, ## in_str -- is the current byte in a string? boolean, ## in_bs -- does the current byte follow a backslash in a string? boolean, ## empty -- is the current object or array empty? boolean, ## first -- is this the first object or array in the input? opts - ) :: {iolist, pp_state} + ) :: {iodata, pp_state} defp pp_iodata(input, output_acc, depth, in_str, in_bs, empty, first, opts) defp pp_iodata("", output_acc, depth, in_str, in_bs, empty, first, opts) do @@ -173,7 +173,7 @@ defmodule Jason.Formatter do pp_byte(byte, rest, output_acc, depth, in_str, in_bs, empty, first, opts) end - defp pp_iodata(byte, output_acc, depth, in_str, in_bs, empty, first, opts) when is_number(byte) do + defp pp_iodata(byte, output_acc, depth, in_str, in_bs, empty, first, opts) when is_integer(byte) do pp_byte(byte, [], output_acc, depth, in_str, in_bs, empty, first, opts) end @@ -191,14 +191,14 @@ defmodule Jason.Formatter do @spec pp_byte( byte, ## byte -- current byte iodata, ## rest -- rest of input data - iolist, ## output -- output iolist (built in reverse order) + iodata, ## output -- output iolist (built in reverse order) non_neg_integer, ## depth -- current nesting depth boolean, ## in_str -- is the current byte in a string? boolean, ## in_bs -- does the current byte follow a backslash in a string? boolean, ## empty -- is the current object or array empty? boolean, ## first -- is this the first object or array in the input? opts - ) :: {iolist, pp_state} + ) :: {iodata, pp_state} defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) ## in string, following backslash @@ -239,7 +239,7 @@ defmodule Jason.Formatter do first -> byte empty -> [opts[:line_separator], tab(opts, depth), byte] depth == 0 -> [opts[:record_separator], byte] - :else -> byte + true -> byte end first = false empty = true From 2438992aed7471e2d95cdbceec3ad8bfb6793364 Mon Sep 17 00:00:00 2001 From: pete gamache <pete@gamache.org> Date: Wed, 28 Mar 2018 13:51:31 -0400 Subject: [PATCH 3/7] build formatter output in FIFO order --- lib/formatter.ex | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/formatter.ex b/lib/formatter.ex index d927abb..f1dec52 100644 --- a/lib/formatter.ex +++ b/lib/formatter.ex @@ -162,11 +162,11 @@ defmodule Jason.Formatter do defp pp_iodata(input, output_acc, depth, in_str, in_bs, empty, first, opts) defp pp_iodata("", output_acc, depth, in_str, in_bs, empty, first, opts) do - {:lists.reverse(output_acc), {depth, in_str, in_bs, empty, first, opts}} + {output_acc, {depth, in_str, in_bs, empty, first, opts}} end defp pp_iodata([], output_acc, depth, in_str, in_bs, empty, first, opts) do - {:lists.reverse(output_acc), {depth, in_str, in_bs, empty, first, opts}} + {output_acc, {depth, in_str, in_bs, empty, first, opts}} end defp pp_iodata(<<byte::size(8), rest::binary>>, output_acc, depth, in_str, in_bs, empty, first, opts) do @@ -182,9 +182,9 @@ defmodule Jason.Formatter do {reversed_output, end_state} = Enum.reduce list, {[], starting_state}, fn (item, {output_acc, state}) -> {depth, in_str, in_bs, empty, first, opts} = state {item_output, new_state} = pp_iodata(item, [], depth, in_str, in_bs, empty, first, opts) - {[item_output | output_acc], new_state} + {[output_acc, item_output], new_state} end - {[:lists.reverse(reversed_output) | output_acc], end_state} + {[output_acc, reversed_output], end_state} end @@ -204,26 +204,26 @@ defmodule Jason.Formatter do ## in string, following backslash defp pp_byte(byte, rest, output, depth, true=in_str, true=_in_bs, empty, first, opts) do in_bs = false - pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## in string, backslash defp pp_byte(byte, rest, output, depth, true=in_str, _in_bs, empty, first, opts) when byte in '\\' do in_bs = true - pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## in string, end quote defp pp_byte(byte, rest, output, depth, true=_in_str, in_bs, empty, first, opts) when byte in '"' do in_str = false - pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## in string, other character defp pp_byte(byte, rest, output, depth, true=in_str, in_bs, empty, first, opts) do - pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## out of string, whitespace @@ -244,7 +244,7 @@ defmodule Jason.Formatter do first = false empty = true depth = depth + 1 - pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end ## out of string, end empty block @@ -252,7 +252,7 @@ defmodule Jason.Formatter do when byte in '}]' do empty = false depth = depth - 1 - pp_iodata(rest, [byte | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## out of string, end non-empty block @@ -260,7 +260,7 @@ defmodule Jason.Formatter do when byte in '}]' do depth = depth - 1 out = [opts[:line_separator], tab(opts, depth), byte] - pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end ## out of string, comma @@ -268,14 +268,14 @@ defmodule Jason.Formatter do when byte in ',' do empty = false out = [byte, opts[:line_separator], tab(opts, depth)] - pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end ## out of string, colon defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) when byte in ':' do out = [byte, opts[:after_colon]] - pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end ## out of string, other character (maybe start quote) @@ -283,7 +283,7 @@ defmodule Jason.Formatter do out = if empty, do: [opts[:line_separator], tab(opts, depth), byte], else: byte in_str = byte in '"' empty = false - pp_iodata(rest, [out | output], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end end From e9659ed8da4bff60b3a1bdd5ad15ab89610055e0 Mon Sep 17 00:00:00 2001 From: pete gamache <pete@gamache.org> Date: Wed, 28 Mar 2018 14:42:30 -0400 Subject: [PATCH 4/7] applied mix format to formatter and formatter_test --- lib/formatter.ex | 171 +++++++++++++++++++++++----------------- test/formatter_test.exs | 3 +- 2 files changed, 99 insertions(+), 75 deletions(-) diff --git a/lib/formatter.ex b/lib/formatter.ex index f1dec52..9e6a438 100644 --- a/lib/formatter.ex +++ b/lib/formatter.ex @@ -10,12 +10,11 @@ defmodule Jason.Formatter do """ @type opts :: [ - {:indent, iodata} | - {:line_separator, iodata} | - {:record_separator, iodata} | - {:after_colon, iodata} - ] - + {:indent, iodata} + | {:line_separator, iodata} + | {:record_separator, iodata} + | {:after_colon, iodata} + ] @doc ~S""" Returns a binary containing a pretty-printed representation of @@ -51,10 +50,9 @@ defmodule Jason.Formatter do @spec pretty_print(iodata, opts) :: binary def pretty_print(iodata, opts \\ []) do pretty_print_to_iodata(iodata, opts) - |> IO.iodata_to_binary + |> IO.iodata_to_binary() end - @doc ~S""" Returns an iolist containing a pretty-printed representation of JSON-encoded `iodata`. @@ -70,13 +68,11 @@ defmodule Jason.Formatter do first = true opts = normalize_opts(opts) - {output, _state} = - pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) + {output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) output end - @doc ~S""" Returns a binary containing a minimized representation of JSON-encoded `iodata`. @@ -96,10 +92,9 @@ defmodule Jason.Formatter do @spec minimize(iodata, opts) :: binary def minimize(iodata, opts \\ []) do minimize_to_iodata(iodata, opts) - |> IO.iodata_to_binary + |> IO.iodata_to_binary() end - @doc ~S""" Returns an iolist containing a minimized representation of JSON-encoded `iodata`. @@ -117,7 +112,6 @@ defmodule Jason.Formatter do ) end - ## Returns a copy of `opts` with defaults applied @spec normalize_opts(keyword) :: opts defp normalize_opts(opts) do @@ -125,40 +119,50 @@ defmodule Jason.Formatter do indent: opts[:indent] || " ", line_separator: opts[:line_separator] || "\n", record_separator: opts[:record_separator] || opts[:line_separator] || "\n", - after_colon: opts[:after_colon] || " ", + after_colon: opts[:after_colon] || " " ] end - ## Returns an iolist containing `depth` instances of `opts[:indent]` @spec tab(opts, non_neg_integer, iolist) :: iolist defp tab(opts, depth, output \\ []) do if depth < 1 do output else - tab(opts, depth-1, [opts[:indent] | output]) + tab(opts, depth - 1, [opts[:indent] | output]) end end - @typep pp_state :: { - non_neg_integer, ## depth -- current nesting depth - boolean, ## in_str -- is the current byte in a string? - boolean, ## in_bs -- does the current byte follow a backslash in a string? - boolean, ## empty -- is the current object or array empty? - boolean, ## first -- is this the first object or array in the input? - } + ## depth -- current nesting depth + non_neg_integer, + ## in_str -- is the current byte in a string? + boolean, + ## in_bs -- does the current byte follow a backslash in a string? + boolean, + ## empty -- is the current object or array empty? + boolean, + ## first -- is this the first object or array in the input? + boolean + } @spec pp_iodata( - iodata, ## input -- input data - iodata, ## output_acc -- output iolist (built in reverse order) - non_neg_integer, ## depth -- current nesting depth - boolean, ## in_str -- is the current byte in a string? - boolean, ## in_bs -- does the current byte follow a backslash in a string? - boolean, ## empty -- is the current object or array empty? - boolean, ## first -- is this the first object or array in the input? - opts - ) :: {iodata, pp_state} + ## input -- input data + iodata, + ## output_acc -- output iolist (built in reverse order) + iodata, + ## depth -- current nesting depth + non_neg_integer, + ## in_str -- is the current byte in a string? + boolean, + ## in_bs -- does the current byte follow a backslash in a string? + boolean, + ## empty -- is the current object or array empty? + boolean, + ## first -- is this the first object or array in the input? + boolean, + opts + ) :: {iodata, pp_state} defp pp_iodata(input, output_acc, depth, in_str, in_bs, empty, first, opts) defp pp_iodata("", output_acc, depth, in_str, in_bs, empty, first, opts) do @@ -169,78 +173,100 @@ defmodule Jason.Formatter do {output_acc, {depth, in_str, in_bs, empty, first, opts}} end - defp pp_iodata(<<byte::size(8), rest::binary>>, output_acc, depth, in_str, in_bs, empty, first, opts) do + defp pp_iodata( + <<byte::size(8), rest::binary>>, + output_acc, + depth, + in_str, + in_bs, + empty, + first, + opts + ) do pp_byte(byte, rest, output_acc, depth, in_str, in_bs, empty, first, opts) end - defp pp_iodata(byte, output_acc, depth, in_str, in_bs, empty, first, opts) when is_integer(byte) do + defp pp_iodata(byte, output_acc, depth, in_str, in_bs, empty, first, opts) + when is_integer(byte) do pp_byte(byte, [], output_acc, depth, in_str, in_bs, empty, first, opts) end defp pp_iodata(list, output_acc, depth, in_str, in_bs, empty, first, opts) when is_list(list) do starting_state = {depth, in_str, in_bs, empty, first, opts} - {reversed_output, end_state} = Enum.reduce list, {[], starting_state}, fn (item, {output_acc, state}) -> - {depth, in_str, in_bs, empty, first, opts} = state - {item_output, new_state} = pp_iodata(item, [], depth, in_str, in_bs, empty, first, opts) - {[output_acc, item_output], new_state} - end - {[output_acc, reversed_output], end_state} - end + {list_output, end_state} = + Enum.reduce(list, {[], starting_state}, fn item, {output_acc, state} -> + {depth, in_str, in_bs, empty, first, opts} = state + {item_output, new_state} = pp_iodata(item, [], depth, in_str, in_bs, empty, first, opts) + {[output_acc, item_output], new_state} + end) + + {[output_acc, list_output], end_state} + end @spec pp_byte( - byte, ## byte -- current byte - iodata, ## rest -- rest of input data - iodata, ## output -- output iolist (built in reverse order) - non_neg_integer, ## depth -- current nesting depth - boolean, ## in_str -- is the current byte in a string? - boolean, ## in_bs -- does the current byte follow a backslash in a string? - boolean, ## empty -- is the current object or array empty? - boolean, ## first -- is this the first object or array in the input? - opts - ) :: {iodata, pp_state} + ## byte -- current byte + byte, + ## rest -- rest of input data + iodata, + ## output -- output iolist (built in reverse order) + iodata, + ## depth -- current nesting depth + non_neg_integer, + ## in_str -- is the current byte in a string? + boolean, + ## in_bs -- does the current byte follow a backslash in a string? + boolean, + ## empty -- is the current object or array empty? + boolean, + ## first -- is this the first object or array in the input? + boolean, + opts + ) :: {iodata, pp_state} defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) ## in string, following backslash - defp pp_byte(byte, rest, output, depth, true=in_str, true=_in_bs, empty, first, opts) do + defp pp_byte(byte, rest, output, depth, true = in_str, true = _in_bs, empty, first, opts) do in_bs = false pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## in string, backslash - defp pp_byte(byte, rest, output, depth, true=in_str, _in_bs, empty, first, opts) - when byte in '\\' do + defp pp_byte(byte, rest, output, depth, true = in_str, _in_bs, empty, first, opts) + when byte in '\\' do in_bs = true pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## in string, end quote - defp pp_byte(byte, rest, output, depth, true=_in_str, in_bs, empty, first, opts) - when byte in '"' do + defp pp_byte(byte, rest, output, depth, true = _in_str, in_bs, empty, first, opts) + when byte in '"' do in_str = false pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## in string, other character - defp pp_byte(byte, rest, output, depth, true=in_str, in_bs, empty, first, opts) do + defp pp_byte(byte, rest, output, depth, true = in_str, in_bs, empty, first, opts) do pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## out of string, whitespace defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) - when byte in ' \n\r\t' do + when byte in ' \n\r\t' do pp_iodata(rest, output, depth, in_str, in_bs, empty, first, opts) end ## out of string, start block defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) - when byte in '{[' do - out = cond do - first -> byte - empty -> [opts[:line_separator], tab(opts, depth), byte] - depth == 0 -> [opts[:record_separator], byte] - true -> byte - end + when byte in '{[' do + out = + cond do + first -> byte + empty -> [opts[:line_separator], tab(opts, depth), byte] + depth == 0 -> [opts[:record_separator], byte] + true -> byte + end + first = false empty = true depth = depth + 1 @@ -248,16 +274,16 @@ defmodule Jason.Formatter do end ## out of string, end empty block - defp pp_byte(byte, rest, output, depth, in_str, in_bs, true=_empty, first, opts) - when byte in '}]' do + defp pp_byte(byte, rest, output, depth, in_str, in_bs, true = _empty, first, opts) + when byte in '}]' do empty = false depth = depth - 1 pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) end ## out of string, end non-empty block - defp pp_byte(byte, rest, output, depth, in_str, in_bs, false=empty, first, opts) - when byte in '}]' do + defp pp_byte(byte, rest, output, depth, in_str, in_bs, false = empty, first, opts) + when byte in '}]' do depth = depth - 1 out = [opts[:line_separator], tab(opts, depth), byte] pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) @@ -265,7 +291,7 @@ defmodule Jason.Formatter do ## out of string, comma defp pp_byte(byte, rest, output, depth, in_str, in_bs, _empty, first, opts) - when byte in ',' do + when byte in ',' do empty = false out = [byte, opts[:line_separator], tab(opts, depth)] pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) @@ -273,7 +299,7 @@ defmodule Jason.Formatter do ## out of string, colon defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) - when byte in ':' do + when byte in ':' do out = [byte, opts[:after_colon]] pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end @@ -286,4 +312,3 @@ defmodule Jason.Formatter do pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end end - diff --git a/test/formatter_test.exs b/test/formatter_test.exs index 32feec7..c7a0a82 100644 --- a/test/formatter_test.exs +++ b/test/formatter_test.exs @@ -10,7 +10,7 @@ defmodule Jason.FormatterTest do "simple-object", "multiple-objects", "backslash-string", - "empty-nest", + "empty-nest" ] for name <- @test_cases do @@ -65,4 +65,3 @@ defmodule Jason.FormatterTest do assert(pretty_print(input, indent: "\t") == output) end end - From d0c869d584503d598a69b48ad1d6c1f38fcd5736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu> Date: Mon, 28 May 2018 16:55:52 +0200 Subject: [PATCH 5/7] Use record for options in formatter --- lib/formatter.ex | 65 ++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/lib/formatter.ex b/lib/formatter.ex index 9e6a438..efb2d7b 100644 --- a/lib/formatter.ex +++ b/lib/formatter.ex @@ -16,6 +16,9 @@ defmodule Jason.Formatter do | {:after_colon, iodata} ] + import Record + defrecordp :opts, [:indent, :line, :record, :colon] + @doc ~S""" Returns a binary containing a pretty-printed representation of JSON-encoded `iodata`. @@ -61,12 +64,14 @@ defmodule Jason.Formatter do """ @spec pretty_print_to_iodata(iodata, opts) :: iodata def pretty_print_to_iodata(iodata, opts \\ []) do + opts = parse_opts(opts, opts(indent: " ", line: "\n", record: nil, colon: " ")) + opts = opts(opts, record: opts(opts, :record) || opts(opts, :line)) + depth = 0 in_str = false in_bs = false empty = false first = true - opts = normalize_opts(opts) {output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) @@ -103,36 +108,36 @@ defmodule Jason.Formatter do """ @spec minimize_to_iodata(iodata, opts) :: iodata def minimize_to_iodata(iodata, opts) do - pretty_print_to_iodata( - iodata, - indent: "", - line_separator: "", - record_separator: opts[:record_separator] || "\n", - after_colon: "" - ) + opts = parse_opts(opts, opts(indent: [], line: [], record: "\n", colon: [])) + + depth = 0 + in_str = false + in_bs = false + empty = false + first = true + + {output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) + + output end - ## Returns a copy of `opts` with defaults applied - @spec normalize_opts(keyword) :: opts - defp normalize_opts(opts) do - [ - indent: opts[:indent] || " ", - line_separator: opts[:line_separator] || "\n", - record_separator: opts[:record_separator] || opts[:line_separator] || "\n", - after_colon: opts[:after_colon] || " " - ] + defp parse_opts(opts, defaults) do + Enum.reduce(opts, defaults, fn + {:indent, indent}, opts -> opts(opts, indent: indent) + {:line_separator, line}, opts -> opts(opts, line: line, record: opts(opts, :record) || line) + {:record_separator, record}, opts -> opts(opts, record: record) + {:after_colon, colon}, opts -> opts(opts, colon: colon) + end) end ## Returns an iolist containing `depth` instances of `opts[:indent]` - @spec tab(opts, non_neg_integer, iolist) :: iolist - defp tab(opts, depth, output \\ []) do - if depth < 1 do - output - else - tab(opts, depth - 1, [opts[:indent] | output]) - end + for depth <- 1..16 do + defp tab(" ", unquote(depth)), do: unquote(String.duplicate(" ", depth)) end + defp tab([], _), do: "" + defp tab(indent, depth), do: List.duplicate(indent, depth) + @typep pp_state :: { ## depth -- current nesting depth non_neg_integer, @@ -262,8 +267,8 @@ defmodule Jason.Formatter do out = cond do first -> byte - empty -> [opts[:line_separator], tab(opts, depth), byte] - depth == 0 -> [opts[:record_separator], byte] + empty -> [opts(opts, :line), tab(opts(opts, :indent), depth), byte] + depth == 0 -> [opts(opts, :record), byte] true -> byte end @@ -285,7 +290,7 @@ defmodule Jason.Formatter do defp pp_byte(byte, rest, output, depth, in_str, in_bs, false = empty, first, opts) when byte in '}]' do depth = depth - 1 - out = [opts[:line_separator], tab(opts, depth), byte] + out = [opts(opts, :line), tab(opts(opts, :indent), depth), byte] pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end @@ -293,20 +298,20 @@ defmodule Jason.Formatter do defp pp_byte(byte, rest, output, depth, in_str, in_bs, _empty, first, opts) when byte in ',' do empty = false - out = [byte, opts[:line_separator], tab(opts, depth)] + out = [byte, opts(opts, :line), tab(opts(opts, :indent), depth)] pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end ## out of string, colon defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) when byte in ':' do - out = [byte, opts[:after_colon]] + out = [byte, opts(opts, :colon)] pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) end ## out of string, other character (maybe start quote) defp pp_byte(byte, rest, output, depth, _in_str, in_bs, empty, first, opts) do - out = if empty, do: [opts[:line_separator], tab(opts, depth), byte], else: byte + out = if empty, do: [opts(opts, :line), tab(opts(opts, :indent), depth), byte], else: byte in_str = byte in '"' empty = false pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) From e65827aa763d85abad0e571f83214e9876b00124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu> Date: Mon, 2 Jul 2018 19:08:58 +0200 Subject: [PATCH 6/7] Refactor formatter, extract string to separate function, limit state --- formatter_test_suite/simple-object.min.json~ | 1 - lib/formatter.ex | 273 ++++++++----------- test/formatter_test.exs | 34 +++ 3 files changed, 147 insertions(+), 161 deletions(-) delete mode 100644 formatter_test_suite/simple-object.min.json~ diff --git a/formatter_test_suite/simple-object.min.json~ b/formatter_test_suite/simple-object.min.json~ deleted file mode 100644 index 8b13789..0000000 --- a/formatter_test_suite/simple-object.min.json~ +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/formatter.ex b/lib/formatter.ex index efb2d7b..348b4c8 100644 --- a/lib/formatter.ex +++ b/lib/formatter.ex @@ -52,7 +52,8 @@ defmodule Jason.Formatter do """ @spec pretty_print(iodata, opts) :: binary def pretty_print(iodata, opts \\ []) do - pretty_print_to_iodata(iodata, opts) + iodata + |> pretty_print_to_iodata(opts) |> IO.iodata_to_binary() end @@ -67,13 +68,10 @@ defmodule Jason.Formatter do opts = parse_opts(opts, opts(indent: " ", line: "\n", record: nil, colon: " ")) opts = opts(opts, record: opts(opts, :record) || opts(opts, :line)) - depth = 0 - in_str = false - in_bs = false + depth = :first empty = false - first = true - {output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) + {output, _state} = pp_iodata(iodata, [], depth, empty, opts) output end @@ -96,7 +94,8 @@ defmodule Jason.Formatter do """ @spec minimize(iodata, opts) :: binary def minimize(iodata, opts \\ []) do - minimize_to_iodata(iodata, opts) + iodata + |> minimize_to_iodata(opts) |> IO.iodata_to_binary() end @@ -108,212 +107,166 @@ defmodule Jason.Formatter do """ @spec minimize_to_iodata(iodata, opts) :: iodata def minimize_to_iodata(iodata, opts) do - opts = parse_opts(opts, opts(indent: [], line: [], record: "\n", colon: [])) + opts = parse_opts(opts, opts(indent: "", line: "", record: "\n", colon: "")) - depth = 0 - in_str = false - in_bs = false + depth = :first empty = false - first = true - {output, _state} = pp_iodata(iodata, [], depth, in_str, in_bs, empty, first, opts) + {output, _state} = pp_iodata(iodata, [], depth, empty, opts) output end defp parse_opts(opts, defaults) do Enum.reduce(opts, defaults, fn - {:indent, indent}, opts -> opts(opts, indent: indent) - {:line_separator, line}, opts -> opts(opts, line: line, record: opts(opts, :record) || line) - {:record_separator, record}, opts -> opts(opts, record: record) - {:after_colon, colon}, opts -> opts(opts, colon: colon) + {:indent, indent}, opts -> + opts(opts, indent: IO.iodata_to_binary(indent)) + + {:line_separator, line}, opts -> + line = IO.iodata_to_binary(line) + opts(opts, line: line, record: opts(opts, :record) || line) + + {:record_separator, record}, opts -> + opts(opts, record: IO.iodata_to_binary(record)) + + {:after_colon, colon}, opts -> + opts(opts, colon: IO.iodata_to_binary(colon)) end) end + @spec tab(String.t(), non_neg_integer) :: iodata() ## Returns an iolist containing `depth` instances of `opts[:indent]` for depth <- 1..16 do defp tab(" ", unquote(depth)), do: unquote(String.duplicate(" ", depth)) end - defp tab([], _), do: "" + defp tab("", _), do: "" defp tab(indent, depth), do: List.duplicate(indent, depth) - @typep pp_state :: { - ## depth -- current nesting depth - non_neg_integer, - ## in_str -- is the current byte in a string? - boolean, - ## in_bs -- does the current byte follow a backslash in a string? - boolean, - ## empty -- is the current object or array empty? - boolean, - ## first -- is this the first object or array in the input? - boolean - } - - @spec pp_iodata( - ## input -- input data - iodata, - ## output_acc -- output iolist (built in reverse order) - iodata, - ## depth -- current nesting depth - non_neg_integer, - ## in_str -- is the current byte in a string? - boolean, - ## in_bs -- does the current byte follow a backslash in a string? - boolean, - ## empty -- is the current object or array empty? - boolean, - ## first -- is this the first object or array in the input? - boolean, - opts - ) :: {iodata, pp_state} - defp pp_iodata(input, output_acc, depth, in_str, in_bs, empty, first, opts) - - defp pp_iodata("", output_acc, depth, in_str, in_bs, empty, first, opts) do - {output_acc, {depth, in_str, in_bs, empty, first, opts}} - end - - defp pp_iodata([], output_acc, depth, in_str, in_bs, empty, first, opts) do - {output_acc, {depth, in_str, in_bs, empty, first, opts}} + defp pp_iodata(<<>>, output_acc, depth, empty, opts) do + {output_acc, &pp_iodata(&1, &2, depth, empty, opts)} end - defp pp_iodata( - <<byte::size(8), rest::binary>>, - output_acc, - depth, - in_str, - in_bs, - empty, - first, - opts - ) do - pp_byte(byte, rest, output_acc, depth, in_str, in_bs, empty, first, opts) + defp pp_iodata(<<byte, rest::binary>>, output_acc, depth, empty, opts) do + pp_byte(byte, rest, output_acc, depth, empty, opts) end - defp pp_iodata(byte, output_acc, depth, in_str, in_bs, empty, first, opts) - when is_integer(byte) do - pp_byte(byte, [], output_acc, depth, in_str, in_bs, empty, first, opts) + defp pp_iodata([], output_acc, depth, empty, opts) do + {output_acc, &pp_iodata(&1, &2, depth, empty, opts)} end - defp pp_iodata(list, output_acc, depth, in_str, in_bs, empty, first, opts) when is_list(list) do - starting_state = {depth, in_str, in_bs, empty, first, opts} - - {list_output, end_state} = - Enum.reduce(list, {[], starting_state}, fn item, {output_acc, state} -> - {depth, in_str, in_bs, empty, first, opts} = state - {item_output, new_state} = pp_iodata(item, [], depth, in_str, in_bs, empty, first, opts) - {[output_acc, item_output], new_state} - end) - - {[output_acc, list_output], end_state} + defp pp_iodata([byte | rest], output_acc, depth, empty, opts) when is_integer(byte) do + pp_byte(byte, rest, output_acc, depth, empty, opts) end - @spec pp_byte( - ## byte -- current byte - byte, - ## rest -- rest of input data - iodata, - ## output -- output iolist (built in reverse order) - iodata, - ## depth -- current nesting depth - non_neg_integer, - ## in_str -- is the current byte in a string? - boolean, - ## in_bs -- does the current byte follow a backslash in a string? - boolean, - ## empty -- is the current object or array empty? - boolean, - ## first -- is this the first object or array in the input? - boolean, - opts - ) :: {iodata, pp_state} - defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) - - ## in string, following backslash - defp pp_byte(byte, rest, output, depth, true = in_str, true = _in_bs, empty, first, opts) do - in_bs = false - pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) + defp pp_iodata([head | tail], output_acc, depth, empty, opts) do + {output_acc, cont} = pp_iodata(head, output_acc, depth, empty, opts) + cont.(tail, output_acc) end - ## in string, backslash - defp pp_byte(byte, rest, output, depth, true = in_str, _in_bs, empty, first, opts) - when byte in '\\' do - in_bs = true - pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) + defp pp_byte(byte, rest, output, depth, empty, opts) when byte in ' \n\r\t' do + pp_iodata(rest, output, depth, empty, opts) end - ## in string, end quote - defp pp_byte(byte, rest, output, depth, true = _in_str, in_bs, empty, first, opts) - when byte in '"' do - in_str = false - pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) - end - - ## in string, other character - defp pp_byte(byte, rest, output, depth, true = in_str, in_bs, empty, first, opts) do - pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) - end - - ## out of string, whitespace - defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) - when byte in ' \n\r\t' do - pp_iodata(rest, output, depth, in_str, in_bs, empty, first, opts) - end - - ## out of string, start block - defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) - when byte in '{[' do - out = + defp pp_byte(byte, rest, output, depth, empty, opts) when byte in '{[' do + {out, depth} = cond do - first -> byte - empty -> [opts(opts, :line), tab(opts(opts, :indent), depth), byte] - depth == 0 -> [opts(opts, :record), byte] - true -> byte + depth == :first -> {byte, 1} + depth == 0 -> {[opts(opts, :record), byte], 1} + empty -> {[opts(opts, :line), tab(opts(opts, :indent), depth), byte], depth + 1} + true -> {byte, depth + 1} end - first = false empty = true - depth = depth + 1 - pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, empty, opts) end - ## out of string, end empty block - defp pp_byte(byte, rest, output, depth, in_str, in_bs, true = _empty, first, opts) - when byte in '}]' do + defp pp_byte(byte, rest, output, depth, true = _empty, opts) when byte in '}]' do empty = false depth = depth - 1 - pp_iodata(rest, [output, byte], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, byte], depth, empty, opts) end - ## out of string, end non-empty block - defp pp_byte(byte, rest, output, depth, in_str, in_bs, false = empty, first, opts) - when byte in '}]' do + defp pp_byte(byte, rest, output, depth, false = empty, opts) when byte in '}]' do depth = depth - 1 out = [opts(opts, :line), tab(opts(opts, :indent), depth), byte] - pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, empty, opts) end - ## out of string, comma - defp pp_byte(byte, rest, output, depth, in_str, in_bs, _empty, first, opts) - when byte in ',' do + defp pp_byte(byte, rest, output, depth, _empty, opts) when byte in ',' do empty = false out = [byte, opts(opts, :line), tab(opts(opts, :indent), depth)] - pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, empty, opts) end - ## out of string, colon - defp pp_byte(byte, rest, output, depth, in_str, in_bs, empty, first, opts) - when byte in ':' do + defp pp_byte(byte, rest, output, depth, empty, opts) when byte in ':' do out = [byte, opts(opts, :colon)] - pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) + pp_iodata(rest, [output, out], depth, empty, opts) end - ## out of string, other character (maybe start quote) - defp pp_byte(byte, rest, output, depth, _in_str, in_bs, empty, first, opts) do + defp pp_byte(byte, rest, output, depth, empty, opts) do out = if empty, do: [opts(opts, :line), tab(opts(opts, :indent), depth), byte], else: byte - in_str = byte in '"' empty = false - pp_iodata(rest, [output, out], depth, in_str, in_bs, empty, first, opts) + + if byte == ?" do + pp_string(rest, [output, out], _in_bs = false, &pp_iodata(&1, &2, depth, empty, opts)) + else + pp_iodata(rest, [output, out], depth, empty, opts) + end + end + + defp pp_string(<<>>, output_acc, in_bs, cont) do + {output_acc, &pp_string(&1, &2, in_bs, cont)} + end + + defp pp_string(<<?", rest::binary>>, output_acc, true = _in_bs, cont) do + pp_string(rest, [output_acc, ?"], false, cont) + end + + defp pp_string(<<?", rest::binary>>, output_acc, false = _in_bs, cont) do + cont.(rest, [output_acc, ?"]) + end + + defp pp_string(<<byte>>, output_acc, in_bs, cont) do + in_bs = not in_bs and byte == ?\\ + {[output_acc, byte], &pp_string(&1, &2, in_bs, cont)} + end + + defp pp_string(binary, output_acc, _in_bs, cont) when is_binary(binary) do + size = byte_size(binary) + + case :binary.match(binary, "\"") do + :nomatch -> + skip = size - 2 + <<_::binary-size(skip), prev, last>> = binary + in_bs = not (prev == ?\\ and last == ?\\) or last == ?\\ + {[output_acc | binary], &pp_string(&1, &2, in_bs, cont)} + + {pos, 1} -> + {leading, tail} = :erlang.split_binary(binary, pos + 1) + output = [output_acc | leading] + + case :binary.at(binary, pos - 1) do + ?\\ -> pp_string(tail, output, false, cont) + _ -> cont.(tail, output) + end + end + end + + defp pp_string([], output_acc, in_bs, cont) do + {output_acc, &pp_string(&1, &2, in_bs, cont)} + end + + defp pp_string([byte | rest], output_acc, in_bs, cont) when is_integer(byte) do + cond do + in_bs -> pp_string(rest, [output_acc, byte], false, cont) + byte == ?" -> cont.(rest, [output_acc, byte]) + true -> pp_string(rest, [output_acc, byte], byte == ?\\, cont) + end + end + + defp pp_string([head | tail], output_acc, in_bs, cont) do + {output_acc, cont} = pp_string(head, output_acc, in_bs, cont) + cont.(tail, output_acc) end end diff --git a/test/formatter_test.exs b/test/formatter_test.exs index c7a0a82..35cf932 100644 --- a/test/formatter_test.exs +++ b/test/formatter_test.exs @@ -64,4 +64,38 @@ defmodule Jason.FormatterTest do output = ~s|{\n\t"a": {\n\t\t"b": [\n\t\t\ttrue,\n\t\t\tfalse\n\t\t]\n\t}\n}| assert(pretty_print(input, indent: "\t") == output) end + + test "proper string escaping" do + input = ["\"abc\\\\", "\""] + output = ~S|"abc\\"| + assert(minimize(input) == output) + + input = ["\"abc\\\\", ?"] + output = ~S|"abc\\"| + assert(minimize(input) == output) + + input = ["\"abc\\\"", "\""] + output = ~S|"abc\""| + assert(minimize(input) == output) + + input = ["\"abc\\\"", ?"] + output = ~S|"abc\""| + assert(minimize(input) == output) + + input = ["\"abc\\", "\"\""] + output = ~S|"abc\""| + assert(minimize(input) == output) + + input = ["\"abc\\", ?", ?"] + output = ~S|"abc\""| + assert(minimize(input) == output) + + input = ["\"abc", "\\", ?", ?"] + output = ~S|"abc\""| + assert(minimize(input) == output) + + input = ["\"abc\\", "\\", ?"] + output = ~S|"abc\\"| + assert(minimize(input) == output) + end end From 6acd396e37f99b48031267f950e275f2c62792b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Muska=C5=82a?= <michal@muskala.eu> Date: Mon, 2 Jul 2018 19:09:23 +0200 Subject: [PATCH 7/7] Support Jason.encode!(data, pretty: true) --- lib/jason.ex | 37 ++++++++++++++++++++++++++++++------- test/encode_test.exs | 4 ++++ test/property_test.exs | 6 ++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/lib/jason.ex b/lib/jason.ex index ca6f1e6..43bda03 100644 --- a/lib/jason.ex +++ b/lib/jason.ex @@ -3,10 +3,12 @@ defmodule Jason do A blazing fast JSON parser and generator in pure Elixir. """ + alias Jason.{Encode, Decoder, DecodeError, EncodeError, Formatter} + @type escape :: :json | :unicode_safe | :html_safe | :javascript_safe @type maps :: :naive | :strict - @type encode_opt :: {:escape, escape} | {:maps, maps} + @type encode_opt :: {:escape, escape} | {:maps, maps} | {:pretty, true | Formatter.opts()} @type keys :: :atoms | :atoms! | :strings | :copy | (String.t() -> term) @@ -14,8 +16,6 @@ defmodule Jason do @type decode_opt :: {:keys, keys} | {:strings, strings} - alias Jason.{Encode, Decoder, DecodeError, EncodeError} - @doc """ Parses a JSON value from `input` iodata. @@ -105,6 +105,11 @@ defmodule Jason do rejected, since both keys would be encoded to the string `"foo"`. * `:naive` (default) - does not perform the check. + * `:pretty` - controls pretty printing of the output. Possible values are: + + * `true` to pretty print with default configuration + * a keyword of options as specified by `Jason.Formatter.pretty_print/2`. + ## Examples iex> Jason.encode(%{a: 1}) @@ -117,7 +122,7 @@ defmodule Jason do @spec encode(term, [encode_opt]) :: {:ok, String.t()} | {:error, EncodeError.t() | Exception.t()} def encode(input, opts \\ []) do - case Encode.encode(input, format_encode_opts(opts)) do + case do_encode(input, format_encode_opts(opts)) do {:ok, result} -> {:ok, IO.iodata_to_binary(result)} {:error, error} -> {:error, error} end @@ -140,7 +145,7 @@ defmodule Jason do """ @spec encode!(term, [encode_opt]) :: String.t() | no_return def encode!(input, opts \\ []) do - case Encode.encode(input, format_encode_opts(opts)) do + case do_encode(input, format_encode_opts(opts)) do {:ok, result} -> IO.iodata_to_binary(result) {:error, error} -> raise error end @@ -168,7 +173,7 @@ defmodule Jason do @spec encode_to_iodata(term, [encode_opt]) :: {:ok, iodata} | {:error, EncodeError.t() | Exception.t()} def encode_to_iodata(input, opts \\ []) do - Encode.encode(input, format_encode_opts(opts)) + do_encode(input, format_encode_opts(opts)) end @doc """ @@ -189,12 +194,30 @@ defmodule Jason do """ @spec encode_to_iodata!(term, [encode_opt]) :: iodata | no_return def encode_to_iodata!(input, opts \\ []) do - case Encode.encode(input, format_encode_opts(opts)) do + case do_encode(input, format_encode_opts(opts)) do {:ok, result} -> result {:error, error} -> raise error end end + defp do_encode(input, %{pretty: true} = opts) do + case Encode.encode(input, opts) do + {:ok, encoded} -> {:ok, Formatter.pretty_print_to_iodata(encoded)} + other -> other + end + end + + defp do_encode(input, %{pretty: pretty} = opts) do + case Encode.encode(input, opts) do + {:ok, encoded} -> {:ok, Formatter.pretty_print_to_iodata(encoded, pretty)} + other -> other + end + end + + defp do_encode(input, opts) do + Encode.encode(input, opts) + end + defp format_encode_opts(opts) do Enum.into(opts, %{escape: :json, maps: :naive}) end diff --git a/test/encode_test.exs b/test/encode_test.exs index 039c14a..0533729 100644 --- a/test/encode_test.exs +++ b/test/encode_test.exs @@ -151,6 +151,10 @@ defmodule Jason.EncoderTest do assert {:error, %Protocol.UndefinedError{}} = Jason.encode(self()) end + test "pretty: true" do + assert to_json(%{a: 3.14159, b: 1}, pretty: true) == ~s|{\n "a": 3.14159,\n "b": 1\n}| + end + defp to_json(value, opts \\ []) do Jason.encode!(value, opts) end diff --git a/test/property_test.exs b/test/property_test.exs index 6426301..0ba6a80 100644 --- a/test/property_test.exs +++ b/test/property_test.exs @@ -33,6 +33,12 @@ if Code.ensure_loaded?(ExUnitProperties) do end end + property "pretty roundtrip" do + check all json <- json(string(:printable)) do + assert decode(encode(json, pretty: true)) == json + end + end + property "unicode escaping" do check all string <- string(:printable) do encoded = encode(string, escape: :unicode)