From 757666cbeefc762623e5a1757ce1fa9340a7ffb2 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Sat, 11 Aug 2018 11:23:42 -0700 Subject: [PATCH 1/2] path: document and check normalizeString invariant There is an `if`-statement in `normalizeString` (a helper function for `path.normalize`) whose `else`-branch is never taken. This patch adds a runtime assertion to document this fact to others reading the code. The remainder of this commit message contains a proof that the branch is in fact never taken. The function `normalizeString` is called in four places (on lines 284, 377, 1110, and 1137). At each call site, we may observe that: - the `separator` argument is either "/" or "\\", and - the `isPathSeparator` argument is a function that returns `true` when given the code of `separator` and returns `true` when given `CHAR_FORWARD_SLASH`. We will therefore take these as preconditions on the function, which is valid because the function is module-private. Below is the source code of the function `normalizeString`, annotated with proof terms. Of particular import is the line marked "CLAIM(*)". ``` > // Resolves . and .. elements in a path with directory names PRECONDITION(A): `separator` must be "/" or "\\". PRECONDITION(B): `isPathSeparator` must return `true` given `separator.charCodeAt(0)`. PRECONDITION(C): `isPathSeparator` must return `true` given `CHAR_FORWARD_SLASH`. > function normalizeString(path, allowAboveRoot, separator, isPathSeparator) { INVARIANT(D): `res` does not end with `separator`. Proof: By induction, at initialization and at every assignment to `res`. The base case holds because `res` is empty and `separator` is not, by PRECONDITION(A). Assignments will be justified inline. INVARIANT(E): `res` does not contain two consecutive separators. Proof: By induction, at initialization and at every assignment to `res`. The base case is immediate. Assignments will be justified inline. > var res = ''; var lastSegmentLength = 0; INVARIANT(F): `lastSlash` is always an integer, and `i` is always an integer. Proof: By induction. The initial values of each are integers. The only assignment to `i` is to increment it (`++i` in the declaration), which preserves integrality. The only reassignment to `lastSlash` is to assign it the value of `i`, which is known by induction to be an integer. INVARIANT(G): Once the loop index `i` is initialized, it holds that `lastSlash <= i`. Proof: By induction, at initialization of `i` and at every assignment to `i` or `lastSlash`. The base case is clear: `i` is initialized to `0`, at which point `lastSlash` is `-1`. The only assignment to `i` is `++i`, which preserves the invariant. The only assignments to `lastSlash` are to set its value to `i`, which also preserve the invariant. > var lastSlash = -1; > var dots = 0; > var code; INVARIANT(H): Loop invariant: `path.slice(lastSlash + 1, i)` does not contain a `separator` (once `i` has been initialized). We refer to this expression as "the slice". Proof: By induction: at initialization of `i`, and at every assignment to `lastSlash`, `i`, or `path`. The base case is clear: initially, the slice has domain `(0, 0)`, so is empty. Assignments will be justified inline. LEMMA(I): If `lastSlash` is assigned the value `i` and neither `lastSlash` nor `i` nor `path` is modified before the next iteration of the loop, then INVARIANT(H) is preserved both (a) at the assignment and (b) at the iteration boundary. Proof: At the assignment, the slice has domain `(i + 1, i)`, so is empty. After `++i`, the slice has domain `(i + 1, i + 1)`, which is still empty. The empty string does not contain a `separator`, because `separator` is non-empty by PRECONDITION(A). This is sufficient to maintain the INVARIANT(H). INVARIANT(J): At the top of the loop, `lastSlash < i`. Proof: By cases on the iteration of the loop. For the first iteration of the loop, `lastSlash === -1` and `i === 0`. For subsequent iterations, note that INVARIANT(G) held at the bottom of the previous iteration of the loop, before `i` was incremented: that is, the previous value of `lastSlash` was less than or equal to the previous value of `i`. Since then, `lastSlash` has not been reassigned, and `i` has been incremented, so it follows that `lastSlash <= i - 1`, and therefore `lastSlash < i`. > for (var i = 0; i <= path.length; ++i) { > if (i < path.length) > code = path.charCodeAt(i); > else if (isPathSeparator(code)) > break; > else > code = CHAR_FORWARD_SLASH; > > if (isPathSeparator(code)) { > if (lastSlash === i - 1 || dots === 1) { > // NOOP > } else if (lastSlash !== i - 1 && dots === 2) { > if (res.length < 2 || lastSegmentLength !== 2 || > res.charCodeAt(res.length - 1) !== CHAR_DOT || > res.charCodeAt(res.length - 2) !== CHAR_DOT) { > if (res.length > 2) { > const lastSlashIndex = res.lastIndexOf(separator); > if (lastSlashIndex !== res.length - 1) { > if (lastSlashIndex === -1) { JUSTIFICATION: This assignment trivially preserves INVARIANT(D). JUSTIFICATION: This assignment trivially preserves INVARIANT(E). > res = ''; > lastSegmentLength = 0; > } else { JUSTIFICATION: This assignment preserves INVARIANT(D): - By control flow, we know that `lastSlashIndex !== -1`. - By definition of `lastIndexOf`, this means that `res` contains a `separator` at index `lastSlashIndex`. - By INVARIANT(E), `res` does not contain two consecutive `separator`s. Therefore, `res` does not contain a `separator` at index `lastSlashIndex - 1`. - Therefore, the new value for `res` does not contain a `separator` at index `lastSlashIndex - 1`, so it does not end with a `separator`. JUSTIFICATION: This assignment preserves INVARIANT(E). By INVARIANT(E), we know that `res` does not contain two consecutive `separator`s. It is immediate that no slice of `res` contains two consecutive `separator`s. > res = res.slice(0, lastSlashIndex); > lastSegmentLength = res.length - 1 - res.lastIndexOf(separator); > } JUSTIFICATION: This assignment preserves INVARIANT(H), by LEMMA(I). > lastSlash = i; > dots = 0; JUSTIFICATION: This loop boundary preserves INVARIANT(H), by LEMMA(I). > continue; > } else { CLAIM(*): This branch is not reachable. Proof: INVARIANT(D) indicates that `res` does not end in a `separator`, which means that `lastSlashIndex !== res.length - 1`, which is the guard for the enclosing `if`-statement. > throw new ERR_ASSERTION( > 'invariant violation: unreachable: ' + > JSON.stringify({ path, allowAboveRoot, separator }) > ); > } > } else if (res.length === 2 || res.length === 1) { JUSTIFICATION: This assignment trivially preserves INVARIANT(D). JUSTIFICATION: This assignment trivially preserves INVARIANT(E). > res = ''; > lastSegmentLength = 0; JUSTIFICATION: This assignment preserves INVARIANT(H), by LEMMA(I). > lastSlash = i; > dots = 0; JUSTIFICATION: This loop boundary preserves INVARIANT(H), by LEMMA(I). > continue; > } > } > if (allowAboveRoot) { > if (res.length > 0) JUSTIFICATION: This assignment preserves INVARIANT(D) because `separator` is either "/" or "\\" by PRECONDITION(A), and so the new value of `res`, which ends with ".", does not end with `separator`. JUSTIFICATION: This assignment preserves INVARIANT(E). We know by INVARIANT(D) that `res` does not end with a separator. Therefore, appending a `separator` does not introduce two consecutive `separator`s, and appending two copies of "." does not introduce two consecutive separators because, by PRECONDITION(A), `separator` is either "/" or "\\" and so does not contain ".". > res += `${separator}..`; > else JUSTIFICATION: This assignment preserves INVARIANT(D) and INVARIANT(E) because `separator` is either "/" or "\\" by PRECONDITION(A), and so does not appear in the new value for `res` at all (either at the end or twice consecutively). > res = '..'; > lastSegmentLength = 2; > } > } else { > if (res.length > 0) JUSTIFICATION: This assignment preserves INVARIANT(D) and INVARIANT(E): - By INVARIANT(J), `lastSlash` was less than `i` at the top of the loop body. By control flow, neither `lastSlash` nor `i` has since been reassigned, so it still holds that `lastSlash < i`. - At this point in the loop body, we have not assigned to `lastSlash`. - By control flow, we also have `lastSlash !== i - 1`. - By INVARIANT(F), both `lastSlash` and `i` are integers. - Therefore, `lastSlash < i - 1`, so `lastSlash + 1 < i`. - By the loop guard, `i <= path.length`. - The previous two facts imply that `lastSlash + 1 < path.length`. - Therefore, `path.slice(lastSlash + 1, i)` is nonempty. - By INVARIANT(H), this slice does not contain a `separator`. - Because the slice is nonempty, the new value of `res` will end in the last character of the slice, which is not a `separator`, so INVARIANT(D) is preserved. - Because `res` does not end with a separator, appending a separator to `res` does not introduce two consecutive separators. Because the slice does not contain a separator, subsequently appending the slice also does not introduce two consecutive separators, so INVARIANT(E) is preserved. > res += separator + path.slice(lastSlash + 1, i); > else JUSTIFICATION: This assignment preserves INVARIANT(D) and INVARIANT(E), because we know from INVARIANT(H) that the slice does not contain a separator at all, so the new value of `res` neither ends in a separator nor contains two consecutive separators. > res = path.slice(lastSlash + 1, i); > lastSegmentLength = i - lastSlash - 1; > } JUSTIFICATION: This assignment preserves INVARIANT(H), by LEMMA(I). > lastSlash = i; > dots = 0; JUSTIFICATION: This loop boundary preserves INVARIANT(H), by LEMMA(I). > } else if (code === CHAR_DOT && dots !== -1) { > ++dots; JUSTIFICATION: This loop boundary preserves INVARIANT(H). We know by induction that `path.slice(lastSlash + 1, i)` does not contain a separator. We know from control flow that `code` is `CHAR_DOT`, so `path[i]` is not a separator. Thus, `path.slice(lastSlash + 1, i + 1)` does not contain a separator, so INVARIANT(H) holds. > } else { > dots = -1; JUSTIFICATION: This loop boundary preserves INVARIANT(H). We know by induction that `path.slice(lastSlash + 1, i)` does not contain a separator. We know from control flow that `!isPathSeparator(code)`. We also know from control flow that `code` is either `path.charCodeAt(i)` or `CHAR_FORWARD_SLASH`. PRECONDITION(C) shows that `code` cannot be `CHAR_FORWARD_SLASH`, because `!isPathSeparator(code)`, so `code` must be `path.charCodeAt(i)`. PRECONDITION(B) shows that `code` cannot be `separator.charCodeAt(0)`. This implies that `path[i]` is not `separator`. Thus, `path.slice(lastSlash + 1, i + 1)` does not contain a separator, so INVARIANT(H) holds. > } > } > return res; > } ``` Test Plan: Existing unit tests pass. No additional tests are required. wchargin-branch: normalizeString-dead-branch --- lib/path.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/path.js b/lib/path.js index 90129f1f52989e..4bb385586166c4 100644 --- a/lib/path.js +++ b/lib/path.js @@ -21,7 +21,10 @@ 'use strict'; -const { ERR_INVALID_ARG_TYPE } = require('internal/errors').codes; +const { + ERR_ASSERTION, + ERR_INVALID_ARG_TYPE +} = require('internal/errors').codes; const { CHAR_UPPERCASE_A, CHAR_LOWERCASE_A, @@ -88,6 +91,11 @@ function normalizeString(path, allowAboveRoot, separator, isPathSeparator) { lastSlash = i; dots = 0; continue; + } else { + throw new ERR_ASSERTION( + 'invariant violation: unreachable: ' + + JSON.stringify({ path, allowAboveRoot, separator }) + ); } } else if (res.length === 2 || res.length === 1) { res = ''; From 013a5270966bcb58b7719486a15f2971fa3652a8 Mon Sep 17 00:00:00 2001 From: William Chargin Date: Mon, 13 Aug 2018 11:29:29 -0700 Subject: [PATCH 2/2] fixup: remove branch entirely instead of asserting See review comments on #22273 for rationale. wchargin-branch: normalizeString-dead-branch --- lib/path.js | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/lib/path.js b/lib/path.js index 4bb385586166c4..c8d92f14af04de 100644 --- a/lib/path.js +++ b/lib/path.js @@ -21,10 +21,7 @@ 'use strict'; -const { - ERR_ASSERTION, - ERR_INVALID_ARG_TYPE -} = require('internal/errors').codes; +const { ERR_INVALID_ARG_TYPE } = require('internal/errors').codes; const { CHAR_UPPERCASE_A, CHAR_LOWERCASE_A, @@ -80,23 +77,16 @@ function normalizeString(path, allowAboveRoot, separator, isPathSeparator) { res.charCodeAt(res.length - 2) !== CHAR_DOT) { if (res.length > 2) { const lastSlashIndex = res.lastIndexOf(separator); - if (lastSlashIndex !== res.length - 1) { - if (lastSlashIndex === -1) { - res = ''; - lastSegmentLength = 0; - } else { - res = res.slice(0, lastSlashIndex); - lastSegmentLength = res.length - 1 - res.lastIndexOf(separator); - } - lastSlash = i; - dots = 0; - continue; + if (lastSlashIndex === -1) { + res = ''; + lastSegmentLength = 0; } else { - throw new ERR_ASSERTION( - 'invariant violation: unreachable: ' + - JSON.stringify({ path, allowAboveRoot, separator }) - ); + res = res.slice(0, lastSlashIndex); + lastSegmentLength = res.length - 1 - res.lastIndexOf(separator); } + lastSlash = i; + dots = 0; + continue; } else if (res.length === 2 || res.length === 1) { res = ''; lastSegmentLength = 0;