shakyShane · MrVoltz · Oct 4, 2023 · Oct 4, 2023
diff --git a/index.js b/index.js
@@ -95,8 +95,17 @@ function RespModifier (opts) {
                 res.end = end;
             }
 
+            var chunks = [];
+
             res.push = function (chunk) {
-                res.data = (res.data || "") + chunk;
+                // Back-compat is somebody was calling push from outside with a string
+                if(!(chunk instanceof Buffer)) {
+                    chunk = Buffer.from(chunk, "utf8");
+                }
+
+                // res.data can contain incorrectly decoded split multi-byte codepoints
+                res.data = (res.data || "") + chunk.toString("utf8");
+                chunks.push(chunk);
             };
 
             res.write = function (string, encoding) {
@@ -106,10 +115,11 @@ function RespModifier (opts) {
                 }
 
                 if (string !== undefined) {
-                    var body = string instanceof Buffer ? string.toString(encoding) : string;
+                    // Convert arguments into a Buffer
+                    var chunk = !(string instanceof Buffer) ? Buffer.from(string, encoding) : string;
                     // If this chunk appears to be valid, push onto the res.data stack
-                    if (force || (utils.isHtml(body) || utils.isHtml(res.data))) {
-                        res.push(body);
+                    if (force || (utils.isHtml(chunk.toString("utf8")) || utils.isHtml(res.data))) {
+                        res.push(chunk);
                     } else {
                         restore();
                         return write.call(res, string, encoding);
@@ -142,19 +152,18 @@ function RespModifier (opts) {
 
             res.end = function (string, encoding) {
 
-                res.data = res.data || "";
-
-                if (typeof string === "string") {
-                    res.data += string;
+                if (!runPatches) {
+                    return end.call(res, string, encoding);
                 }
 
-                if (string instanceof Buffer) {
-                    res.data += string.toString();
+                if (typeof string === "string") {
+                    res.push(Buffer.from(string, encoding));
+                } else if (string instanceof Buffer) {
+                    res.push(string);
                 }
 
-                if (!runPatches) {
-                    return end.call(res, string, encoding);
-                }
+                // Reconstruct res.data from buffers stored in chunks, so it is decoded as a valid Unicode string
+                res.data = Buffer.concat(chunks).toString("utf8");
 
                 // Check if our body is HTML, and if it does not already have the snippet.
                 if (force || utils.isHtml(res.data) && !utils.snip(res.data)) {
@@ -163,9 +172,9 @@ function RespModifier (opts) {
                     runPatches = false;
                 }
                 if (res.data !== undefined && !res._header) {
-                    res.setHeader("content-length", Buffer.byteLength(res.data, encoding));
+                    res.setHeader("content-length", Buffer.byteLength(res.data, "utf8"));
                 }
-                end.call(res, res.data, encoding);
+                end.call(res, res.data, "utf8");
             };
         }
     }

diff --git a/test/rules.unicode.js b/test/rules.unicode.js
@@ -0,0 +1,45 @@
+var express = require("express");
+var assert = require("chai").assert;
+var request = require("supertest");
+var respMod = require("..");
+
+var inputString = "<body>příliš žluťoučký kůň</body>";
+
+describe("Correctly decodes unicode characters split by chunk boundary", function () {
+
+    var app, lr;
+
+    before(function () {
+
+        app = express();
+
+        lr = respMod.create({
+            rules: [
+                {
+                    match: "something",
+                    replace: "something else"
+                }
+            ]
+        });
+
+        app.use(lr.middleware);
+
+        app.get("/", function (req, res) {
+            var inputBuf = Buffer.from(inputString, "utf8");
+
+            res.write(inputBuf.slice(0, 8)); // split in the middle of ř
+            res.end(inputBuf.slice(8));
+        });
+    });
+
+    it("should correctly handle split unicode character", function (done) {
+
+        request(app)
+            .get("/")
+            .set("Accept", "text/html")
+            .end(function (err, res) {
+                assert.equal(res.text, inputString);
+                done();
+            });
+    });
+});