From d23a8ab76bcd4bf289d23ece458996cd513aec5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez-Mondrag=C3=B3n?= Date: Fri, 6 Sep 2024 12:55:42 -0600 Subject: [PATCH] Improve JSONL binary serialization performance --- singer_sdk/_singerlib/encoding/_msgspec.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/singer_sdk/_singerlib/encoding/_msgspec.py b/singer_sdk/_singerlib/encoding/_msgspec.py index ed8a9f8362..70a5fe8084 100644 --- a/singer_sdk/_singerlib/encoding/_msgspec.py +++ b/singer_sdk/_singerlib/encoding/_msgspec.py @@ -43,6 +43,22 @@ def dec_hook(type: type, obj: t.Any) -> t.Any: # noqa: ARG001, A002, ANN401 encoder = msgspec.json.Encoder(enc_hook=enc_hook, decimal_format="number") decoder = msgspec.json.Decoder(dec_hook=dec_hook, float_hook=decimal.Decimal) +_jsonl_msg_buffer = bytearray(64) + + +def serialize_jsonl(obj: object, **kwargs: t.Any) -> bytes: # noqa: ARG001 + """Serialize a dictionary into a line of jsonl. + + Args: + obj: A Python object usually a dict. + **kwargs: Optional key word arguments. + + Returns: + A bytes of serialized json. + """ + encoder.encode_into(obj, _jsonl_msg_buffer) + _jsonl_msg_buffer.extend(b"\n") + return _jsonl_msg_buffer class MsgSpecReader(GenericSingerReader[str]): @@ -82,7 +98,7 @@ def serialize_message(self, message: Message) -> bytes: # noqa: PLR6301 Returns: A string of serialized json. """ - return encoder.encode(message.to_dict()) + return serialize_jsonl(message.to_dict()) def write_message(self, message: Message) -> None: """Write a message to stdout.