Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect handling of multi-byte codepoints split by chunk boundaries #15

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,17 @@ function RespModifier (opts) {
res.end = end;
}

var chunks = [];

res.push = function (chunk) {
res.data = (res.data || "") + chunk;
// Back-compat is somebody was calling push from outside with a string
if(!(chunk instanceof Buffer)) {
chunk = Buffer.from(chunk, "utf8");
}

// res.data can contain incorrectly decoded split multi-byte codepoints
res.data = (res.data || "") + chunk.toString("utf8");
chunks.push(chunk);
};

res.write = function (string, encoding) {
Expand All @@ -106,10 +115,11 @@ function RespModifier (opts) {
}

if (string !== undefined) {
var body = string instanceof Buffer ? string.toString(encoding) : string;
// Convert arguments into a Buffer
var chunk = !(string instanceof Buffer) ? Buffer.from(string, encoding) : string;
// If this chunk appears to be valid, push onto the res.data stack
if (force || (utils.isHtml(body) || utils.isHtml(res.data))) {
res.push(body);
if (force || (utils.isHtml(chunk.toString("utf8")) || utils.isHtml(res.data))) {
res.push(chunk);
} else {
restore();
return write.call(res, string, encoding);
Expand Down Expand Up @@ -142,19 +152,18 @@ function RespModifier (opts) {

res.end = function (string, encoding) {

res.data = res.data || "";

if (typeof string === "string") {
res.data += string;
if (!runPatches) {
return end.call(res, string, encoding);
}

if (string instanceof Buffer) {
res.data += string.toString();
if (typeof string === "string") {
res.push(Buffer.from(string, encoding));
} else if (string instanceof Buffer) {
res.push(string);
}

if (!runPatches) {
return end.call(res, string, encoding);
}
// Reconstruct res.data from buffers stored in chunks, so it is decoded as a valid Unicode string
res.data = Buffer.concat(chunks).toString("utf8");

// Check if our body is HTML, and if it does not already have the snippet.
if (force || utils.isHtml(res.data) && !utils.snip(res.data)) {
Expand All @@ -163,9 +172,9 @@ function RespModifier (opts) {
runPatches = false;
}
if (res.data !== undefined && !res._header) {
res.setHeader("content-length", Buffer.byteLength(res.data, encoding));
res.setHeader("content-length", Buffer.byteLength(res.data, "utf8"));
}
end.call(res, res.data, encoding);
end.call(res, res.data, "utf8");
};
}
}
Expand Down
45 changes: 45 additions & 0 deletions test/rules.unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
var express = require("express");
var assert = require("chai").assert;
var request = require("supertest");
var respMod = require("..");

var inputString = "<body>příliš žluťoučký kůň</body>";

describe("Correctly decodes unicode characters split by chunk boundary", function () {

var app, lr;

before(function () {

app = express();

lr = respMod.create({
rules: [
{
match: "something",
replace: "something else"
}
]
});

app.use(lr.middleware);

app.get("/", function (req, res) {
var inputBuf = Buffer.from(inputString, "utf8");

res.write(inputBuf.slice(0, 8)); // split in the middle of ř
res.end(inputBuf.slice(8));
});
});

it("should correctly handle split unicode character", function (done) {

request(app)
.get("/")
.set("Accept", "text/html")
.end(function (err, res) {
assert.equal(res.text, inputString);
done();
});
});
});