Agoric · warner · Sep 20, 2020 · Sep 19, 2020 · Sep 19, 2020 · kriskowal
@@ -0,0 +1,92 @@
+// adapted from 'netstring-stream', https://github.com/tlivings/netstring-stream/
+const { Transform } = require('stream');
+
+const COLON = 58;
+const COMMA = 44;
+
+// input is a Buffer, output is a netstring-wrapped Buffer
+export function encode(data) {
+  const prefix = Buffer.from(`${data.length}:`);
+  const suffix = Buffer.from(',');
+  return Buffer.concat([prefix, data, suffix]);
+}
+
+// input is a sequence of strings, output is a byte pipe
+export function encoderStream() {
+  function transform(chunk, encoding, callback) {
+    if (!Buffer.isBuffer(chunk)) {
+      throw Error('stream requires Buffers');
+    }
+    let err;
+    try {
+      this.push(encode(chunk));
+    } catch (e) {
+      err = e;
+    }
+    callback(err);
+  }
+  return new Transform({ transform, writableObjectMode: true });
+}
+
+// Input is a Buffer containing zero or more netstrings and maybe some
+// leftover bytes. Output is zero or more decoded Buffers, one per netstring,
+// plus a Buffer of leftover bytes.
+//
+export function decode(data) {
+  // TODO: it would be more efficient to accumulate pending data in an array,
+  // rather than doing a concat each time
+  let start = 0;
+  const payloads = [];
+
+  for (;;) {
+    const colon = data.indexOf(COLON, start);
+    if (colon === -1) {
+      break; // still waiting for `${LENGTH}:`
+    }
+    const sizeString = data.toString('utf-8', start, colon);
+    const size = parseInt(sizeString, 10);
+    if (!(size > -1)) {
+      // reject NaN, all negative numbers
+      throw Error(`unparseable size '${sizeString}', should be integer`);
+    }
+    if (data.length < colon + 1 + size + 1) {
+      break; // still waiting for `${DATA}.`
+    }
+    if (data[colon + 1 + size] !== COMMA) {
+      throw Error(`malformed netstring: not terminated by comma`);
+    }
+    payloads.push(data.subarray(colon + 1, colon + 1 + size));
+    start = colon + 1 + size + 1;
+  }
+
+  const leftover = data.subarray(start);
+  return { leftover, payloads };
+}
+
+// input is a byte pipe, output is a sequence of Buffers
+export function decoderStream() {
+  let buffered = Buffer.from('');
+
+  function transform(chunk, encoding, callback) {
+    if (!Buffer.isBuffer(chunk)) {
+      throw Error('stream requires Buffers');
+    }
+    buffered = Buffer.concat([buffered, chunk]);
+    let err;
+    try {
+      const { leftover, payloads } = decode(buffered);
+      buffered = leftover;
+      for (let i = 0; i < payloads.length; i += 1) {
+        this.push(payloads[i]);
+      }
+    } catch (e) {
+      err = e;
+    }
+    // we buffer all data internally, to accommodate netstrings larger than
+    // Transform's default buffer size, and callback() indicates that we've
+    // consumed the input
+    callback(err);
+  }
+
+  return new Transform({ transform, readableObjectMode: true });
+}
@@ -0,0 +1,138 @@
+import '@agoric/install-ses'; // adds 'harden' to global
+
+import test from 'ava';
+import { encode, encoderStream, decode, decoderStream } from '../src/netstring';
+
+const umlaut = 'ümlaut';
+const umlautBuffer = Buffer.from(umlaut, 'utf-8');
+// the following string may not render in your editor, but it contains four
+// emoji glued together, which is frequently rendered as a single glyph.
+const emoji = '👨‍👨‍👧‍👧';
+const emojiBuffer = Buffer.from(emoji, 'utf-8');
+// They are:
+//  U+1F468 "MAN"
+//  U+200D "ZERO WIDTH JOINER"
+//  U+1F468 "MAN"
+//  U+200D "ZERO WIDTH JOINER"
+//  U+1F467 "GIRL"
+//  U+200D "ZERO WIDTH JOINER"
+//  U+1F467 "GIRL"
+
+// The emoji are off the BMP and require two UTF-16 things, while the joiner
+// only requires one. So JavaScript considers the length to be 2+1+2+1+2+1+2
+// = 11. The UTF-8 encoding needs four bytes for the emoji, and three for the
+// joiner, so the Buffer length is 4+3+4+3+4+3+4 = 25.
+
+test('setup', t => {
+  t.is(umlaut.length, 6);
+  t.is(umlautBuffer.length, 7);
+  t.is(emoji.length, 11);
+  t.is(emojiBuffer.length, 25);
+});
+
+test('encode', t => {
+  function eq(input, expected) {
+    const encoded = encode(Buffer.from(input));
+    const expBuf = Buffer.from(expected);
+    if (encoded.compare(expBuf) !== 0) {
+      console.log(`got : ${encoded}`);
+      console.log(`want: ${expBuf}`);
+    }
+    t.deepEqual(encoded, expBuf);
+  }
+
+  eq('', '0:,');
+  eq('a', '1:a,');
+  eq('abc', '3:abc,');
+  let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8');
+  eq(umlautBuffer, expectedBuffer);
+  expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8');
+  eq(emojiBuffer, expectedBuffer);
+});
+
+test('encode stream', async t => {
+  const e = encoderStream();
+  const chunks = [];
+  e.on('data', data => chunks.push(data));
+  e.write(Buffer.from(''));
+  const b1 = Buffer.from('0:,');
+  t.deepEqual(Buffer.concat(chunks), b1);
+  e.write(Buffer.from('hello'));
+  const b2 = Buffer.from('5:hello,');
+  t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2]));
+  e.write(umlautBuffer);
+  const b3 = Buffer.concat([Buffer.from('7:'), umlautBuffer, Buffer.from(',')]);
+  t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3]));
+  e.write(emojiBuffer);
+  const b4 = Buffer.concat([Buffer.from('25:'), emojiBuffer, Buffer.from(',')]);
+  t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4]));
+
+  e.end();
+  t.deepEqual(Buffer.concat(chunks), Buffer.concat([b1, b2, b3, b4]));
+});
+
+test('decode', t => {
+  function eq(input, expPayloads, expLeftover) {
+    const encPayloads = expPayloads.map(Buffer.from);
+    const encLeftover = Buffer.from(expLeftover);
+
+    const { payloads, leftover } = decode(Buffer.from(input));
+    t.deepEqual(payloads, encPayloads);
+    t.deepEqual(leftover, encLeftover);
+  }
+
+  eq('', [], '');
+  eq('0', [], '0');
+  eq('0:', [], '0:');
+  eq('0:,', [''], '');
+  eq('0:,1', [''], '1');
+  eq('0:,1:', [''], '1:');
+  eq('0:,1:a', [''], '1:a');
+  eq('0:,1:a,', ['', 'a'], '');
+
+  let expectedBuffer = Buffer.from(`7:${umlaut},`, 'utf-8');
+  eq(expectedBuffer, [umlaut], '');
+
+  expectedBuffer = Buffer.from(`25:${emoji},`, 'utf-8');
+  eq(expectedBuffer, [emoji], '');
+
+  function bad(input, message) {
+    t.throws(() => decode(Buffer.from(input)), { message });
+  }
+
+  // bad('a', 'non-numeric length prefix');
+  bad('a:', `unparseable size 'a', should be integer`);
+  bad('1:ab', 'malformed netstring: not terminated by comma');
+});
+
+test('decode stream', async t => {
+  const d = decoderStream();
+  function write(s) {
+    d.write(Buffer.from(s));
+  }
+
+  const msgs = [];
+  d.on('data', msg => msgs.push(msg));
+
+  function eq(expectedMessages) {
+    t.deepEqual(msgs, expectedMessages.map(Buffer.from));
+  }
+
+  write('');
+  eq([]);
+  write('0');
+  eq([]);
+  write(':');
+  eq([]);
+  write(',');
+  eq(['']);
+
+  write('1:');
+  eq(['']);
+  write('a,2:ab');
+  eq(['', 'a']);
+  write(',');
+  eq(['', 'a', 'ab']);
+  write('3:abc,4:abcd,5:abcde,');
+  eq(['', 'a', 'ab', 'abc', 'abcd', 'abcde']);
+});