diff --git a/src/fastdiff.js b/src/fastdiff.js index a61ef5e..8d13179 100644 --- a/src/fastdiff.js +++ b/src/fastdiff.js @@ -100,8 +100,12 @@ export default function fastDiff( a, b, cmp, atomicChanges = false ) { return a === b; }; - // Transform text or any iterable into arrays for easier, consistent processing. - // Array.from was used here but it generated incorrect results for multi-byte unicode sequences. + // Convert the string (or any array-like object - eg. NodeList) to an array by using the slice() method because, + // unlike Array.from(), it returns array of UTF-16 code units instead of the code points of a string. + // One code point might be a surrogate pair of two code units. All text offsets are expected to be in code units. + // See ckeditor/ckeditor5#3147. + // + // We need to make sure here that fastDiff() works identical to diff(). if ( !Array.isArray( a ) ) { a = Array.prototype.slice.call( a ); } diff --git a/tests/diff.js b/tests/diff.js index d673db2..e026a38 100644 --- a/tests/diff.js +++ b/tests/diff.js @@ -99,6 +99,14 @@ describe( 'diff', () => { it( 'should properly replace emoji', () => { expect( diff( 'a🙂b', 'axb' ) ).to.deep.equal( [ 'equal', ...emojiDiffDelete, 'insert', 'equal' ] ); } ); + + it( 'should properly replace one emoji with another', () => { + // 😄 = '\ud83d\ude04' = 2 chars + // Note both emoji have same first code unit + expect( diff( 'a🙂b', 'a😄b' ) ).to.deep.equal( + [ 'equal', 'equal', ...emojiDiffInsert.slice( 1 ), ...emojiDiffDelete.slice( 1 ), 'equal' ] + ); + } ); } ); describe( 'combined emoji - unicode ZWJ sequence', () => {