Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix lists decoding #17

Merged
merged 4 commits into from
Nov 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions lib/ex_rlp.ex
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ defmodule ExRLP do
end

@doc """
Given an RLP-encoded string, returns a decoded RLP structure (which is an
array of RLP structures or binaries).
Given an RLP-encoded string, returns a decoded RPL structure (which is an array of RLP structures or binaries).

## Examples

Expand Down
106 changes: 70 additions & 36 deletions lib/ex_rlp/decode.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,83 +3,117 @@ defmodule ExRLP.Decode do

@spec decode(binary(), keyword()) :: ExRLP.t()
def decode(item, options \\ []) when is_binary(item) do
case item
|> unencode(Keyword.get(options, :encoding, :binary))
|> decode_item do
[res] -> res
[] -> nil
els -> els
end
item
|> unencode(Keyword.get(options, :encoding, :binary))
|> decode_item
end

@spec unencode(binary() | String.t(), atom()) :: binary()
@spec unencode(binary(), atom()) :: binary()
defp unencode(value, :binary), do: value
defp unencode(value, :hex), do: Base.decode16!(value, case: :lower)
defp unencode(value, :hex), do: decode_hex(value)

@spec decode_item(binary(), ExRLP.t()) :: ExRLP.t()
defp decode_item(rlp_binary, result \\ [])
defp decode_item(rlp_binary, result \\ nil)

# When we don't have any RLP left, return the accumulator
defp decode_item(<<>>, result) do
defp decode_item("", result) when is_list(result) do
Enum.reverse(result)
end

# Decodes the head represents an item to be added directly
# to the result.
defp decode_item("", result) do
result
end

defp decode_item(<<(<<prefix>>), tail::binary>>, result) when prefix < 128 do
decode_item(tail, [<<prefix>> | result])
new_item = <<prefix>>

new_result = add_new_item(result, new_item)

decode_item(tail, new_result)
end

# Decodes medium length-binary?
defp decode_item(<<(<<prefix>>), tail::binary>>, result) when prefix <= 183 do
{new_item, new_tail} = decode_medium_binary(prefix, tail, 128)

decode_item(new_tail, [new_item | result])
new_result = add_new_item(result, new_item)

decode_item(new_tail, new_result)
end

# Decodes long length-binary?
defp decode_item(<<(<<be_size_prefix>>), tail::binary>>, result) when be_size_prefix < 192 do
{new_item, new_tail} = decode_long_binary(be_size_prefix, tail, 183)

decode_item(new_tail, [new_item | result])
new_result = add_new_item(result, new_item)

decode_item(new_tail, new_result)
end

defp decode_item(<<(<<be_size_prefix>>), tail::binary>>, result) when be_size_prefix == 192 do
new_item = []

new_result = add_new_item(result, new_item)

decode_item(tail, new_result)
end

defp decode_item(<<(<<prefix>>), tail::binary>>, result) when prefix <= 247 do
{list, new_tail} = decode_medium_binary(prefix, tail, 192)

new_result = add_decoded_list(result, list)

decode_item(new_tail, new_result)
end

defp decode_item(<<(<<be_size_prefix>>), tail::binary>>, result) do
{list, new_tail} = decode_long_binary(be_size_prefix, tail, 247)

new_result = result |> add_decoded_list(list)

decode_item(new_tail, new_result)
end

# Decodes an empty list
defp decode_item(<<(<<be_size_prefix>>), tail::binary>>, result)
when be_size_prefix == 192 do
decode_item(tail, [[] | result])
@spec add_new_item(ExRLP.t(), ExRLP.t()) :: ExRLP.t()
def add_new_item(nil, new_item) do
new_item
end

defp decode_item(<<(<<prefix>>), tail::binary>>, result) do
{list_bin, new_tail} =
if prefix <= 247 do
decode_medium_binary(prefix, tail, 192)
else
decode_long_binary(prefix, tail, 247)
end
def add_new_item(result, new_item) do
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this function is more clear than simply inline the list operation. Mostly, it obscures (in the code above) that this function is just a simple operation and nothing more complicated.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I agree. But we may have nil in the result variable.
I can change it to

if is_nil(result) do
  new_item
else
  [new_item | result]
end

in every place where this function is used

[new_item | result]
end

@spec add_decoded_list(ExRLP.t(), binary()) :: ExRLP.t()
defp add_decoded_list(nil, rlp_list_binary) do
rlp_list_binary
|> decode_item([])
|> Enum.reverse()
end

next_list = decode_item(list_bin, [])
defp add_decoded_list(result, rlp_list_binary) do
list_items = decode_item(rlp_list_binary, [])

decode_item(new_tail, [next_list | result])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does remove a tail-call optimization, right?

Copy link
Member Author

@ayrat555 ayrat555 Nov 27, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, it's moved to add_decoded_list function

[list_items | result]
end

@spec decode_medium_binary(integer(), binary(), integer()) :: {binary(), binary()}
defp decode_medium_binary(length_prefix, tail, prefix) do
item_length = length_prefix - prefix
<<item::binary-size(item_length), new_tail::binary()>> = tail
<<item::binary-size(item_length), new_tail::binary>> = tail

{item, new_tail}
end

@spec decode_long_binary(integer(), binary(), integer()) :: {binary(), binary()}
defp decode_long_binary(be_size_prefix, tail, prefix) do
be_size = be_size_prefix - prefix
<<be::binary-size(be_size), data::binary()>> = tail
<<be::binary-size(be_size), data::binary>> = tail

item_length = :binary.decode_unsigned(be)

<<item::binary-size(item_length), new_tail::binary()>> = data
<<item::binary-size(item_length), new_tail::binary>> = data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would there be an option to extract this into a function head?
if you compile this with bin_opt_info flags you get informed that
NOT OPTIMIZED: sub binary is used or returned
you can read more about this here http://erlang.org/documentation/doc-6.0/doc/efficiency_guide/binaryhandling.html

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so because length is not constant

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what the warning suggest is that it could delay the binary creation if it was passed in a function call.
so instead of returning {item, new_tail} you would return data and pattern match to decompose


{item, new_tail}
end

@spec decode_hex(binary()) :: binary()
defp decode_hex(binary) do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is used anywhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's used on line 13

Base.decode16!(binary, case: :lower)
end
end
208 changes: 207 additions & 1 deletion test/ex_rlp/decode_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ defmodule ExRLP.DecodeTest do
test_file_name
|> read_json_file
|> Enum.each(fn {test_name, %{"in" => expected_result, "out" => input}} ->
expected_result = expected_result |> normalize_data
expected_result = normalize_data(expected_result)

result =
input
Expand All @@ -21,5 +21,211 @@ defmodule ExRLP.DecodeTest do
"Test for #{test_name} failed, expected #{result} to equal to #{expected_result}"
end)
end

test "decodes long list" do
rlp_bin =
<<248, 167, 184, 65, 41, 156, 166, 172, 253, 53, 227, 215, 45, 139, 163, 209, 226, 182,
11, 85, 97, 213, 175, 82, 24, 235, 91, 193, 130, 4, 87, 105, 235, 66, 38, 145, 10, 48,
26, 202, 227, 179, 105, 255, 252, 74, 72, 153, 214, 176, 37, 49, 232, 159, 212, 254, 54,
162, 207, 13, 147, 96, 123, 164, 112, 181, 15, 120, 0, 184, 64, 253, 161, 207, 246, 116,
201, 12, 154, 25, 117, 57, 254, 61, 251, 83, 8, 106, 206, 100, 248, 62, 215, 198, 234,
190, 199, 65, 247, 243, 129, 204, 128, 62, 82, 171, 44, 213, 93, 85, 105, 188, 228, 52,
113, 7, 163, 16, 223, 213, 248, 138, 1, 12, 210, 255, 209, 0, 92, 164, 6, 241, 132, 40,
119, 160, 126, 150, 139, 186, 19, 182, 197, 14, 44, 76, 215, 242, 65, 204, 13, 100, 209,
172, 37, 199, 245, 149, 45, 242, 49, 172, 106, 43, 218, 142, 229, 214, 4, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0>>

expected_result = [
<<41, 156, 166, 172, 253, 53, 227, 215, 45, 139, 163, 209, 226, 182, 11, 85, 97, 213, 175,
82, 24, 235, 91, 193, 130, 4, 87, 105, 235, 66, 38, 145, 10, 48, 26, 202, 227, 179, 105,
255, 252, 74, 72, 153, 214, 176, 37, 49, 232, 159, 212, 254, 54, 162, 207, 13, 147, 96,
123, 164, 112, 181, 15, 120, 0>>,
<<253, 161, 207, 246, 116, 201, 12, 154, 25, 117, 57, 254, 61, 251, 83, 8, 106, 206, 100,
248, 62, 215, 198, 234, 190, 199, 65, 247, 243, 129, 204, 128, 62, 82, 171, 44, 213, 93,
85, 105, 188, 228, 52, 113, 7, 163, 16, 223, 213, 248, 138, 1, 12, 210, 255, 209, 0, 92,
164, 6, 241, 132, 40, 119>>,
<<126, 150, 139, 186, 19, 182, 197, 14, 44, 76, 215, 242, 65, 204, 13, 100, 209, 172, 37,
199, 245, 149, 45, 242, 49, 172, 106, 43, 218, 142, 229, 214>>,
<<4>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>,
<<0>>
]

result = Decode.decode(rlp_bin)

assert result == expected_result
end

test "decodes nested lists" do
expected_result =
[["key1", "val1"], ["key2", "val2"], ["key3", "val3"], ["key4", "val4"]]
|> normalize_data

input =
"ecca846b6579318476616c31ca846b6579328476616c32ca846b6579338476616c33ca846b6579348476616c34"

result =
input
|> Decode.decode(encoding: :hex)
|> normalize_decoded_data(expected_result)

assert result == expected_result
end
end
end