From 54dd66065acc2feba8421e0942ea8743218bf244 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 2 Aug 2020 23:34:02 -0700 Subject: [PATCH 1/5] Use `bytes` object for concatenation --- distributed/protocol/serialize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/protocol/serialize.py b/distributed/protocol/serialize.py index 8ebac2500f5..ab072aef21b 100644 --- a/distributed/protocol/serialize.py +++ b/distributed/protocol/serialize.py @@ -574,7 +574,7 @@ def _serialize_bytes(obj): @dask_deserialize.register((bytes, bytearray)) def _deserialize_bytes(header, frames): - return b"".join(frames) + return bytes().join(frames) @dask_serialize.register(memoryview) From 4b988cad1374fd2bc472fa6220dc66697880b928 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 2 Aug 2020 23:41:25 -0700 Subject: [PATCH 2/5] Split `bytes`/`bytearray` serialization Handle these two separately to ensure we are creating the right types in each respective case. --- distributed/protocol/serialize.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/distributed/protocol/serialize.py b/distributed/protocol/serialize.py index ab072aef21b..f16c46b607b 100644 --- a/distributed/protocol/serialize.py +++ b/distributed/protocol/serialize.py @@ -564,19 +564,32 @@ def normalize_Serialized(o): return [o.header] + o.frames # for dask.base.tokenize -# Teach serialize how to handle bytestrings -@dask_serialize.register((bytes, bytearray)) +# Teach serialize how to handle bytes +@dask_serialize.register(bytes) def _serialize_bytes(obj): header = {} # no special metadata frames = [obj] return header, frames -@dask_deserialize.register((bytes, bytearray)) +# Teach serialize how to handle bytestrings +@dask_serialize.register(bytearray) +def _serialize_bytearray(obj): + header = {} # no special metadata + frames = [obj] + return header, frames + + +@dask_deserialize.register(bytes) def _deserialize_bytes(header, frames): return bytes().join(frames) +@dask_deserialize.register(bytearray) +def _deserialize_bytearray(header, frames): + return bytearray().join(frames) + + @dask_serialize.register(memoryview) def _serialize_memoryview(obj): if obj.format == "O": From 06e0b782c39c2b72b20e2383122c5678de656a5d Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 2 Aug 2020 23:43:51 -0700 Subject: [PATCH 3/5] Add a fast path to deserialize `bytes`/`bytearray` --- distributed/protocol/serialize.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/distributed/protocol/serialize.py b/distributed/protocol/serialize.py index f16c46b607b..2ce546cea2e 100644 --- a/distributed/protocol/serialize.py +++ b/distributed/protocol/serialize.py @@ -582,12 +582,18 @@ def _serialize_bytearray(obj): @dask_deserialize.register(bytes) def _deserialize_bytes(header, frames): - return bytes().join(frames) + if len(frames) == 1 and isinstance(frames[0], bytes): + return frames[0] + else: + return bytes().join(frames) @dask_deserialize.register(bytearray) def _deserialize_bytearray(header, frames): - return bytearray().join(frames) + if len(frames) == 1 and isinstance(frames[0], bytearray): + return frames[0] + else: + return bytearray().join(frames) @dask_serialize.register(memoryview) From db085ec9b288fdf71fb1739e595c9c1e7274e027 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 2 Aug 2020 23:45:55 -0700 Subject: [PATCH 4/5] Test `bytes`/`bytearray` type deserialization --- distributed/protocol/tests/test_serialize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/distributed/protocol/tests/test_serialize.py b/distributed/protocol/tests/test_serialize.py index f11186e1c70..ae9c9572a57 100644 --- a/distributed/protocol/tests/test_serialize.py +++ b/distributed/protocol/tests/test_serialize.py @@ -69,6 +69,7 @@ def test_serialize_bytestrings(): header, frames = serialize(b) assert frames[0] is b bb = deserialize(header, frames) + assert type(bb) == type(b) assert bb == b From 33ad869d3ac3c078bfa5a5a6b6d2d2bd83c09e5e Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 3 Aug 2020 00:02:44 -0700 Subject: [PATCH 5/5] Test deserializing other types and multiple frames Make sure that `bytes` and `bytearray` types are deserialized correctly even if the frames are of a different type or more frames are involved. --- distributed/protocol/tests/test_serialize.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/distributed/protocol/tests/test_serialize.py b/distributed/protocol/tests/test_serialize.py index ae9c9572a57..5ebe615d4e9 100644 --- a/distributed/protocol/tests/test_serialize.py +++ b/distributed/protocol/tests/test_serialize.py @@ -71,6 +71,12 @@ def test_serialize_bytestrings(): bb = deserialize(header, frames) assert type(bb) == type(b) assert bb == b + bb = deserialize(header, list(map(memoryview, frames))) + assert type(bb) == type(b) + assert bb == b + bb = deserialize(header, [b"", *frames]) + assert type(bb) == type(b) + assert bb == b def test_Serialize():