From 97158efcd1576824ee305c4d6e5a2d93a6f6b24f Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 14 Feb 2024 17:48:08 +0000 Subject: [PATCH 01/11] Sort out behaviour of newlineIsToken and ignoreWhitespace --- README.md | 10 +--------- src/diff/line.js | 29 ++++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1496e00f..132c730a 100644 --- a/README.md +++ b/README.md @@ -48,21 +48,13 @@ Broadly, jsdiff's diff functions all take an old text and a new text and perform * `Diff.diffLines(oldStr, newStr[, options])` - diffs two blocks of text, treating each line as a token. Options - * `ignoreWhitespace`: `true` to strip all leading and trailing whitespace characters from each line before performing the diff. Defaults to `false`. + * `ignoreWhitespace`: `true` to ignore leading and trailing whitespace characters when checking if two lines are equal. Defaults to `false`. * `stripTrailingCr`: `true` to remove all trailing CR (`\r`) characters before performing the diff. Defaults to `false`. This helps to get a useful diff when diffing UNIX text files against Windows text files. * `newlineIsToken`: `true` to treat the newline character at the end of each line as its own token. This allows for changes to the newline structure to occur independently of the line content and to be treated as such. In general this is the more human friendly form of `diffLines`; the default behavior with this option turned off is better suited for patches and other computer friendly output. Defaults to `false`. Returns a list of [change objects](#change-objects). -* `Diff.diffTrimmedLines(oldStr, newStr[, options])` - diffs two blocks of text, comparing line by line, after stripping leading and trailing whitespace. Equivalent to calling `diffLines` with `ignoreWhitespace: true`. - - Options - * `stripTrailingCr`: Same as in `diffLines`. Defaults to `false`. - * `newlineIsToken`: Same as in `diffLines`. Defaults to `false`. - - Returns a list of [change objects](#change-objects). - * `Diff.diffSentences(oldStr, newStr[, options])` - diffs two blocks of text, treating each sentence as a token. Returns a list of [change objects](#change-objects). diff --git a/src/diff/line.js b/src/diff/line.js index 4ab01b2b..a44c9ab2 100644 --- a/src/diff/line.js +++ b/src/diff/line.js @@ -23,9 +23,6 @@ lineDiff.tokenize = function(value) { if (i % 2 && !this.options.newlineIsToken) { retLines[retLines.length - 1] += line; } else { - if (this.options.ignoreWhitespace) { - line = line.trim(); - } retLines.push(line); } } @@ -33,7 +30,33 @@ lineDiff.tokenize = function(value) { return retLines; }; +lineDiff.equals = function(left, right) { + // If we're ignoring whitespace, we need to normalise lines by stripping + // whitespace before checking equality. (This has an annoying interaction + // with newlineIsToken that requires special handling: if newlines get their + // own token, then we DON'T want to trim the *newlines* tokens down to empty + // strings, since this would cause us to treat whitespace-only line content + // as equal to a separator between lines, which would be weird and + // inconsistent with the documented behavior of the options.) + if (this.options.ignoreWhitespace) { + if (!this.options.newlineIsToken || !left.includes("\n")) { + left = left.trim(); + } + if (!this.options.newlineIsToken || !right.includes("\n")) { + right = right.trim(); + } + } + return Diff.equals(left, right); +} + export function diffLines(oldStr, newStr, callback) { return lineDiff.diff(oldStr, newStr, callback); } + +// Kept for backwards compatibility. This is a rather arbitrary wrapper method +// that just calls `diffLines` with `ignoreWhitespace: true`. It's confusing to +// have two ways to do exactly the same thing in the API, so we no longer +// document this one (library users should explicitly use `diffLines` with +// `ignoreWhitespace: true` instead) but we keep it around to maintain +// compatibility with code that used old versions. export function diffTrimmedLines(oldStr, newStr, callback) { let options = generateOptions(callback, {ignoreWhitespace: true}); return lineDiff.diff(oldStr, newStr, options); From 8f8c31ba165bcb34c3d7ed338dd7885d11bbf928 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 12:29:48 +0000 Subject: [PATCH 02/11] Fix linting errors I just introduced --- src/diff/line.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/diff/line.js b/src/diff/line.js index a44c9ab2..f3df3e88 100644 --- a/src/diff/line.js +++ b/src/diff/line.js @@ -39,15 +39,15 @@ lineDiff.equals = function(left, right) { // as equal to a separator between lines, which would be weird and // inconsistent with the documented behavior of the options.) if (this.options.ignoreWhitespace) { - if (!this.options.newlineIsToken || !left.includes("\n")) { + if (!this.options.newlineIsToken || !left.includes('\n')) { left = left.trim(); } - if (!this.options.newlineIsToken || !right.includes("\n")) { + if (!this.options.newlineIsToken || !right.includes('\n')) { right = right.trim(); } } return Diff.equals(left, right); -} +}; export function diffLines(oldStr, newStr, callback) { return lineDiff.diff(oldStr, newStr, callback); } From c9c01fcaf088d4b5653cf915ebbc88be90aba2f3 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 12:32:50 +0000 Subject: [PATCH 03/11] Fix broken super call --- src/diff/line.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/line.js b/src/diff/line.js index f3df3e88..f482f0dd 100644 --- a/src/diff/line.js +++ b/src/diff/line.js @@ -46,7 +46,7 @@ lineDiff.equals = function(left, right) { right = right.trim(); } } - return Diff.equals(left, right); + return Diff.prototype.equals.call(this, left, right); }; export function diffLines(oldStr, newStr, callback) { return lineDiff.diff(oldStr, newStr, callback); } From 84de1c0984e4742db4f4c35bbffabb2a3cafd2bc Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 12:39:53 +0000 Subject: [PATCH 04/11] Update failing test to reflect changes --- test/diff/line.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/diff/line.js b/test/diff/line.js index abd0ede9..44633368 100644 --- a/test/diff/line.js +++ b/test/diff/line.js @@ -146,7 +146,7 @@ describe('diff/line', function() { const diffResult = diffTrimmedLines( 'line\r\nold value \r\nline', 'line\r\nnew value\r\nline'); - expect(convertChangesToXML(diffResult)).to.equal('line\r\nold value\r\nnew value\r\nline'); + expect(convertChangesToXML(diffResult)).to.equal('line\r\nold value \r\nnew value\r\nline'); }); }); From 0d03f2d00c131798aa110ed44dece7c270dd7f7f Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 12:40:14 +0000 Subject: [PATCH 05/11] Update passing test to more completely test the new behaviour --- test/diff/line.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/diff/line.js b/test/diff/line.js index 44633368..45dfeff1 100644 --- a/test/diff/line.js +++ b/test/diff/line.js @@ -138,8 +138,8 @@ describe('diff/line', function() { it('should ignore leading and trailing whitespace', function() { const diffResult = diffTrimmedLines( 'line\nvalue \nline', - 'line\nvalue\nline'); - expect(convertChangesToXML(diffResult)).to.equal('line\nvalue\nline'); + 'line \nvalue\nline'); + expect(convertChangesToXML(diffResult)).to.equal('line \nvalue\nline'); }); it('should handle windows line endings', function() { From 7ce105d345ac51dd563f26a0dad36056d7f0ddba Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 12:41:04 +0000 Subject: [PATCH 06/11] Fix a gibberish test name --- test/diff/line.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/diff/line.js b/test/diff/line.js index 45dfeff1..a02d8ca6 100644 --- a/test/diff/line.js +++ b/test/diff/line.js @@ -12,7 +12,7 @@ describe('diff/line', function() { 'line\nnew value\nline'); expect(convertChangesToXML(diffResult)).to.equal('line\nold value\nnew value\nline'); }); - it('should the same lines in diff', function() { + it('should treat identical lines as equal', function() { const diffResult = diffLines( 'line\nvalue\nline', 'line\nvalue\nline'); @@ -128,7 +128,7 @@ describe('diff/line', function() { 'line\nnew value\nline'); expect(convertChangesToXML(diffResult)).to.equal('line\nold value\nnew value\nline'); }); - it('should the same lines in diff', function() { + it('should treat identical lines as equal', function() { const diffResult = diffTrimmedLines( 'line\nvalue\nline', 'line\nvalue\nline'); From 44cb0c574d695655832a1d1c8ed2b2de222c751d Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 13:47:56 +0000 Subject: [PATCH 07/11] Add test of newlineIsToken/ignoreWhitespace compatibility --- test/diff/line.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/diff/line.js b/test/diff/line.js index a02d8ca6..3c32480b 100644 --- a/test/diff/line.js +++ b/test/diff/line.js @@ -148,6 +148,15 @@ describe('diff/line', function() { 'line\r\nnew value\r\nline'); expect(convertChangesToXML(diffResult)).to.equal('line\r\nold value \r\nnew value\r\nline'); }); + + it('should be compatible with newlineIsToken', function() { + const diffResult = diffTrimmedLines( + 'line1\nline2\n \nline4\n \n', + 'line1\nline2\n\n\nline4\n \n', + {newlineIsToken: true} + ); + expect(convertChangesToXML(diffResult)).to.equal('line1\nline2\n \n\nline4\n \n'); + }); }); describe('#diffLinesNL', function() { From 3aba42fd135b436eef440b71affdf34221feb506 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 14:07:24 +0000 Subject: [PATCH 08/11] Add tests that demonstrate a bug in the ignoreWhitespace/newlineIsToken compat --- test/diff/line.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/diff/line.js b/test/diff/line.js index 3c32480b..64099624 100644 --- a/test/diff/line.js +++ b/test/diff/line.js @@ -142,6 +142,11 @@ describe('diff/line', function() { expect(convertChangesToXML(diffResult)).to.equal('line \nvalue\nline'); }); + it('should not consider adding whitespace to an empty line an insertion', function() { + const diffResult = diffTrimmedLines('foo\n\nbar', 'foo\n \nbar'); + expect(convertChangesToXML(diffResult)).to.equal('foo\n \nbar'); + }); + it('should handle windows line endings', function() { const diffResult = diffTrimmedLines( 'line\r\nold value \r\nline', @@ -157,6 +162,15 @@ describe('diff/line', function() { ); expect(convertChangesToXML(diffResult)).to.equal('line1\nline2\n \n\nline4\n \n'); }); + + it( + 'should not consider adding whitespace to an empty line an insertion ' + + 'even in newlineIsToken mode where a token may be an empty string', + function() { + const diffResult = diffTrimmedLines('foo\n\nbar', 'foo\n \nbar', {newlineIsToken: true}); + expect(convertChangesToXML(diffResult)).to.equal('foo\n \nbar'); + } + ); }); describe('#diffLinesNL', function() { From bc4302d698e03233289e5945b38041e162502dbb Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 14:44:44 +0000 Subject: [PATCH 09/11] Document quirk of using the two options together --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 132c730a..e360548b 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,8 @@ Broadly, jsdiff's diff functions all take an old text and a new text and perform This helps to get a useful diff when diffing UNIX text files against Windows text files. * `newlineIsToken`: `true` to treat the newline character at the end of each line as its own token. This allows for changes to the newline structure to occur independently of the line content and to be treated as such. In general this is the more human friendly form of `diffLines`; the default behavior with this option turned off is better suited for patches and other computer friendly output. Defaults to `false`. + Note that while using `ignoreWhitespace` in combination with `newlineIsToken` is not an error, results may not be as expected. With `ignoreWhitespace: true` and `newlineIsToken: false`, changing a completely empty line to contain some spaces is treated as a non-change, but with `ignoreWhitespace: true` and `newlineIsToken: true`, it is treated as an insertion. This is because the content of a completely blank line is not a token at all in `newlineIsToken` mode. + Returns a list of [change objects](#change-objects). * `Diff.diffSentences(oldStr, newStr[, options])` - diffs two blocks of text, treating each sentence as a token. From eda3f59b75b0a2d5096030055bd1cdc0369d7011 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 14:50:42 +0000 Subject: [PATCH 10/11] Add release notes --- release-notes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/release-notes.md b/release-notes.md index 2450d124..14a25faf 100644 --- a/release-notes.md +++ b/release-notes.md @@ -11,6 +11,7 @@ - [#460](https://github.com/kpdecker/jsdiff/pull/460) Added `oneChangePerToken` option. - [#467](https://github.com/kpdecker/jsdiff/pull/467) When passing a `comparator(left, right)` to `diffArrays`, values from the old array will now consistently be passed as the first argument (`left`) and values from the new array as the second argument (`right`). Previously this was almost (but not quite) always the other way round. - [#480](https://github.com/kpdecker/jsdiff/pull/480) Passing `maxEditLength` to `createPatch` & `createTwoFilesPatch` now works properly (i.e. returns undefined if the max edit distance is exceeded; previous behavior was to crash with a `TypeError` if the edit distance was exceeded). +- [#486](https://github.com/kpdecker/jsdiff/pull/486) The `ignoreWhitespace` option of `diffLines` behaves more sensibly now. `value`s in returned change objects now include leading/trailing whitespace even when `ignoreWhitespace` is used, just like how with `ignoreCase` the `value`s still reflect the case of one of the original texts instead of being all-lowercase. `ignoreWhitespace` is also now compatible with `newlineIsToken`. Finally, `diffTrimmedLines` is deprecated (and removed from the docs) in favour of using `diffLines` with `ignoreWhitespace: true`; the two are, and always have been, equivalent. ## v5.2.0 From 51acd4cc5b7b77343c97b6a3d3460fa456557a7b Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Thu, 15 Feb 2024 15:09:11 +0000 Subject: [PATCH 11/11] Fix comment typo --- src/diff/line.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/line.js b/src/diff/line.js index f482f0dd..0d56ef13 100644 --- a/src/diff/line.js +++ b/src/diff/line.js @@ -34,7 +34,7 @@ lineDiff.equals = function(left, right) { // If we're ignoring whitespace, we need to normalise lines by stripping // whitespace before checking equality. (This has an annoying interaction // with newlineIsToken that requires special handling: if newlines get their - // own token, then we DON'T want to trim the *newlines* tokens down to empty + // own token, then we DON'T want to trim the *newline* tokens down to empty // strings, since this would cause us to treat whitespace-only line content // as equal to a separator between lines, which would be weird and // inconsistent with the documented behavior of the options.)