Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Commit

Permalink
Merge pull request #327 from ckeditor/i/3147
Browse files Browse the repository at this point in the history
Fix: Fixed various cases with typing multi-byte unicode sequences (e.g. emojis). Closes ckeditor/ckeditor5#3147. Closes ckeditor/ckeditor5#6495.
  • Loading branch information
Reinmar authored Mar 27, 2020
2 parents 7cd1f48 + 3af9d5e commit 6dc1ba6
Show file tree
Hide file tree
Showing 3 changed files with 246 additions and 14 deletions.
11 changes: 8 additions & 3 deletions src/fastdiff.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,18 @@ export default function fastDiff( a, b, cmp, atomicChanges = false ) {
return a === b;
};

// Transform text or any iterable into arrays for easier, consistent processing.
// Convert the string (or any array-like object - eg. NodeList) to an array by using the slice() method because,
// unlike Array.from(), it returns array of UTF-16 code units instead of the code points of a string.
// One code point might be a surrogate pair of two code units. All text offsets are expected to be in code units.
// See ckeditor/ckeditor5#3147.
//
// We need to make sure here that fastDiff() works identical to diff().
if ( !Array.isArray( a ) ) {
a = Array.from( a );
a = Array.prototype.slice.call( a );
}

if ( !Array.isArray( b ) ) {
b = Array.from( b );
b = Array.prototype.slice.call( b );
}

// Find first and last change.
Expand Down
106 changes: 99 additions & 7 deletions tests/diff.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,33 @@ describe( 'diff', () => {
} );

it( 'should diff strings', () => {
expect( diff( 'aba', 'acca' ) ).to.deep.equals( [ 'equal', 'insert', 'insert', 'delete', 'equal' ] );
expect( diff( 'aba', 'acca' ) ).to.deep.equal( [ 'equal', 'insert', 'insert', 'delete', 'equal' ] );
testUtils.sinon.assert.notCalled( fastDiffSpy );
} );

it( 'should diff arrays', () => {
expect( diff( Array.from( 'aba' ), Array.from( 'acca' ) ) ).to.deep.equals( [ 'equal', 'insert', 'insert', 'delete', 'equal' ] );
expect( diff( Array.from( 'aba' ), Array.from( 'acca' ) ) ).to.deep.equal( [ 'equal', 'insert', 'insert', 'delete', 'equal' ] );
testUtils.sinon.assert.notCalled( fastDiffSpy );
} );

it( 'should reverse result if the second string is shorter', () => {
expect( diff( 'acca', 'aba' ) ).to.deep.equals( [ 'equal', 'delete', 'delete', 'insert', 'equal' ] );
expect( diff( 'acca', 'aba' ) ).to.deep.equal( [ 'equal', 'delete', 'delete', 'insert', 'equal' ] );
testUtils.sinon.assert.notCalled( fastDiffSpy );
} );

it( 'should diff if strings are same', () => {
expect( diff( 'abc', 'abc' ) ).to.deep.equals( [ 'equal', 'equal', 'equal' ] );
expect( diff( 'abc', 'abc' ) ).to.deep.equal( [ 'equal', 'equal', 'equal' ] );
testUtils.sinon.assert.notCalled( fastDiffSpy );
} );

it( 'should diff if one string is empty', () => {
expect( diff( '', 'abc' ) ).to.deep.equals( [ 'insert', 'insert', 'insert' ] );
expect( diff( '', 'abc' ) ).to.deep.equal( [ 'insert', 'insert', 'insert' ] );
testUtils.sinon.assert.notCalled( fastDiffSpy );
} );

it( 'should use custom comparator', () => {
expect( diff( 'aBc', 'abc' ) ).to.deep.equals( [ 'equal', 'insert', 'delete', 'equal' ] );
expect( diff( 'aBc', 'abc', ( a, b ) => a.toLowerCase() == b.toLowerCase() ) ).to.deep.equals( [ 'equal', 'equal', 'equal' ] );
expect( diff( 'aBc', 'abc' ) ).to.deep.equal( [ 'equal', 'insert', 'delete', 'equal' ] );
expect( diff( 'aBc', 'abc', ( a, b ) => a.toLowerCase() == b.toLowerCase() ) ).to.deep.equal( [ 'equal', 'equal', 'equal' ] );
testUtils.sinon.assert.notCalled( fastDiffSpy );
} );

Expand All @@ -67,4 +67,96 @@ describe( 'diff', () => {
diff( getLongText( 10 ), getLongText( 1990 ) );
testUtils.sinon.assert.called( fastDiffSpy );
} );

describe( 'with multi-byte unicode', () => {
describe( 'simple emoji - single unicode code point', () => {
// 🙂 = '\ud83d\ude42' = 2 chars
const emojiLength = '🙂'.length;
const emojiDiffInsert = new Array( emojiLength ).fill( 'insert' );
const emojiDiffEqual = new Array( emojiLength ).fill( 'equal' );
const emojiDiffDelete = new Array( emojiLength ).fill( 'delete' );

it( 'should properly handle emoji insertion', () => {
expect( diff( 'abc', 'ab🙂c' ) ).to.deep.equal( [ 'equal', 'equal', ...emojiDiffInsert, 'equal' ] );
} );

it( 'should properly handle emoji insertion on the end', () => {
expect( diff( 'abc', 'abc🙂' ) ).to.deep.equal( [ 'equal', 'equal', 'equal', ...emojiDiffInsert ] );
} );

it( 'should properly handle appending to string containing emoji', () => {
expect( diff( 'abc🙂', 'abc🙂d' ) ).to.deep.equal( [ 'equal', 'equal', 'equal', ...emojiDiffEqual, 'insert' ] );
} );

it( 'should properly handle insertion to string containing emoji', () => {
expect( diff( 'ab🙂cd', 'ab🙂cde' ) ).to.deep.equal( [ 'equal', 'equal', ...emojiDiffEqual, 'equal', 'equal', 'insert' ] );
} );

it( 'should properly remove emoji', () => {
expect( diff( 'a🙂b', 'ab' ) ).to.deep.equal( [ 'equal', ...emojiDiffDelete, 'equal' ] );
} );

it( 'should properly replace emoji', () => {
expect( diff( 'a🙂b', 'axb' ) ).to.deep.equal( [ 'equal', ...emojiDiffDelete, 'insert', 'equal' ] );
} );

it( 'should properly replace one emoji with another', () => {
// 😄 = '\ud83d\ude04' = 2 chars
// Note both emoji have same first code unit
expect( diff( 'a🙂b', 'a😄b' ) ).to.deep.equal(
[ 'equal', 'equal', ...emojiDiffInsert.slice( 1 ), ...emojiDiffDelete.slice( 1 ), 'equal' ]
);
} );
} );

describe( 'combined emoji - unicode ZWJ sequence', () => {
// 👩‍🦰 = '\ud83d\udc69\u200d\ud83e\uddB0' = 5 chars
const emojiLength = '👩‍🦰'.length;
const emojiDiffInsert = new Array( emojiLength ).fill( 'insert' );
const emojiDiffEqual = new Array( emojiLength ).fill( 'equal' );
const emojiDiffDelete = new Array( emojiLength ).fill( 'delete' );

it( 'should properly handle emoji insertion (with ZWJ)', () => {
expect( diff( 'abc', 'ab👩‍🦰c' ) ).to.deep.equal( [ 'equal', 'equal', ...emojiDiffInsert, 'equal' ] );
} );

it( 'should properly handle emoji insertion on the end (with ZWJ)', () => {
expect( diff( 'abc', 'abc👩‍🦰' ) ).to.deep.equal( [ 'equal', 'equal', 'equal', ...emojiDiffInsert ] );
} );

it( 'should properly handle appending to string containing emoji (with ZWJ)', () => {
expect( diff( 'ab👩‍🦰', 'ab👩‍🦰c' ) ).to.deep.equal( [ 'equal', 'equal', ...emojiDiffEqual, 'insert' ] );
} );

it( 'should properly handle insertion to string containing emoji (with ZWJ)', () => {
expect( diff( 'a👩‍🦰b', 'a👩‍🦰bc' ) ).to.deep.equal( [ 'equal', ...emojiDiffEqual, 'equal', 'insert' ] );
} );

it( 'should properly remove emoji (with ZWJ)', () => {
expect( diff( 'a👩‍🦰b', 'ab' ) ).to.deep.equal( [ 'equal', ...emojiDiffDelete, 'equal' ] );
} );

it( 'should properly replace emoji (with ZWJ)', () => {
expect( diff( 'a👩‍🦰b', 'axb' ) ).to.deep.equal( [ 'equal', ...emojiDiffDelete, 'insert', 'equal' ] );
} );

it( 'should properly replace ZWJ sequence with simple emoji', () => {
const simpleEmojiDiffInsert = new Array( '🙂'.length ).fill( 'insert' );

// Note that first char of both emoji is the same.
expect( diff( 'a👩‍🦰b', 'a🙂b' ) ).to.deep.equal( [
'equal', 'equal', ...emojiDiffDelete.slice( 1 ), ...simpleEmojiDiffInsert.slice( 1 ), 'equal'
] );
} );

it( 'should properly replace simple emoji with ZWJ sequence', () => {
const simpleEmojiDiffDelete = new Array( '🙂'.length ).fill( 'delete' );

// Note that first char of both emoji is the same.
expect( diff( 'a🙂b', 'a👩‍🦰b' ) ).to.deep.equal( [
'equal', 'equal', ...emojiDiffInsert.slice( 1 ), ...simpleEmojiDiffDelete.slice( 1 ), 'equal'
] );
} );
} );
} );
} );
143 changes: 139 additions & 4 deletions tests/fastdiff.js
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,48 @@ describe( 'fastDiff', () => {
{ index: 2, type: 'insert', values: [ { text: 'baz' } ] }
], true, ( a, b ) => a.text === b.text );
} );

describe( 'with multi-byte unicode', () => {
describe( 'simple emoji - single unicode code point', () => {
// 🙂 = '\ud83d\ude42' = 2 chars

it( 'should properly handle emoji insertion', () => {
expectDiff( 'abc', 'ab🙂c', [ { index: 2, type: 'insert', values: '🙂'.split( '' ) } ] );
} );

it( 'should properly handle emoji insertion on the end', () => {
expectDiff( 'abc', 'abc🙂', [ { index: 3, type: 'insert', values: '🙂'.split( '' ) } ] );
} );

it( 'should properly handle appending to string containing emoji', () => {
expectDiff( 'abc🙂', 'abc🙂d', [ { index: 5, type: 'insert', values: [ 'd' ] } ] );
} );

it( 'should properly handle insertion to string containing emoji', () => {
expectDiff( 'ab🙂cd', 'ab🙂cde', [ { index: 6, type: 'insert', values: [ 'e' ] } ] );
} );
} );

describe( 'combined emoji - unicode ZWJ sequence', () => {
// 👩‍🦰 = '\ud83d\udc69\u200d\ud83e\uddB0' = 5 chars

it( 'should properly handle emoji with ZWJ insertion', () => {
expectDiff( 'abc', 'ab👩‍🦰c', [ { index: 2, type: 'insert', values: '👩‍🦰'.split( '' ) } ] );
} );

it( 'should properly handle emoji (with ZWJ) insertion on the end', () => {
expectDiff( 'abc', 'abc👩‍🦰', [ { index: 3, type: 'insert', values: '👩‍🦰'.split( '' ) } ] );
} );

it( 'should properly handle appending to string containing emoji (with ZWJ)', () => {
expectDiff( 'ab👩‍🦰', 'ab👩‍🦰c', [ { index: 7, type: 'insert', values: [ 'c' ] } ] );
} );

it( 'should properly handle insertion to string containing emoji (with ZWJ)', () => {
expectDiff( 'a👩‍🦰b', 'a👩‍🦰bc', [ { index: 7, type: 'insert', values: [ 'c' ] } ] );
} );
} );
} );
} );

describe( 'deletion', () => {
Expand Down Expand Up @@ -196,6 +238,42 @@ describe( 'fastDiff', () => {
{ index: 0, type: 'delete', howMany: 1 }
], true, ( a, b ) => a.text === b.text );
} );

describe( 'with multi-byte unicode', () => {
describe( 'simple emoji - single unicode code point', () => {
// 🙂 = '\ud83d\ude42' = 2 chars
const emojiLength = '🙂'.split( '' ).length;

it( 'should properly handle emoji delete', () => {
expectDiff( 'ab🙂c', 'abc', [ { index: 2, type: 'delete', howMany: emojiLength } ] );
} );

it( 'should properly handle emoji delete at end', () => {
expectDiff( 'ab🙂', 'ab', [ { index: 2, type: 'delete', howMany: emojiLength } ] );
} );

it( 'should properly handle emoji delete at beginning', () => {
expectDiff( '🙂ab', 'ab', [ { index: 0, type: 'delete', howMany: emojiLength } ] );
} );
} );

describe( 'combined emoji - unicode ZWJ sequence', () => {
// 👩‍🦰 = '\ud83d\udc69\u200d\ud83e\uddB0' = 5 chars
const emojiLength = '👩‍🦰'.split( '' ).length;

it( 'should properly handle emoji delete (with ZWJ)', () => {
expectDiff( 'ab👩‍🦰c', 'abc', [ { index: 2, type: 'delete', howMany: emojiLength } ] );
} );

it( 'should properly handle emoji delete at end (with ZWJ)', () => {
expectDiff( 'ab👩‍🦰', 'ab', [ { index: 2, type: 'delete', howMany: emojiLength } ] );
} );

it( 'should properly handle emoji delete at beginning (with ZWJ)', () => {
expectDiff( '👩‍🦰ab', 'ab', [ { index: 0, type: 'delete', howMany: emojiLength } ] );
} );
} );
} );
} );

describe( 'replacement', () => {
Expand Down Expand Up @@ -293,6 +371,63 @@ describe( 'fastDiff', () => {
{ index: 2, type: 'delete', howMany: 1 }
], true, ( a, b ) => a.text === b.text );
} );

describe( 'with multi-byte unicode', () => {
// 🙂 = '\ud83d\ude42' = 2 chars
const smileEmoji = '🙂'.split( '' );

// 👩 = '\ud83d\udc69' = 2 chars
const womanEmoji = '👩'.split( '' );

// 👩‍🦰 = '\ud83d\udc69\u200d\ud83e\uddB0' = 5 chars
const womanRedHairEmoji = '👩‍🦰'.split( '' );

// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 1, type: 'delete', howMany: 2 }, { index: 1, type: 'insert', values: [ 'x' ] } ]
it( 'should properly replace emoji with text', () => {
expectDiff( 'a🙂b', 'axb', [
{ index: 1, type: 'insert', values: [ 'x' ] },
{ index: 2, type: 'delete', howMany: smileEmoji.length }
], false );
} );

it( 'should properly replace text with emoji', () => {
expectDiff( 'abc', 'a👩c', [
{ index: 1, type: 'insert', values: womanEmoji },
{ index: 3, type: 'delete', howMany: 1 }
] );
} );

it( 'should properly replace emoji with emoji', () => {
// Note that first char of both emoji is the same.
expectDiff( 'a👩b', 'a🙂b', [
{ index: 2, type: 'insert', values: smileEmoji.slice( 1 ) },
{ index: 3, type: 'delete', howMany: 1 }
] );
} );

it( 'should properly replace simple emoji with ZWJ sequence of it', () => {
// Note that first 2 chars of both emoji are the same.
expectDiff( 'a👩b', 'a👩‍🦰b', [
{ index: 3, type: 'insert', values: womanRedHairEmoji.slice( 2 ) }
] );
} );

it( 'should properly replace ZWJ sequence with simple emoji (part of sequence)', () => {
// Note that first 2 chars of both emoji are the same.
expectDiff( 'a👩‍🦰b', 'a👩b', [
{ index: 3, type: 'delete', howMany: 3 }
] );
} );

it( 'should properly replace simple emoji with other ZWJ sequence', () => {
// Note that first char of both emoji is the same.
expectDiff( 'a🙂b', 'a👩‍🦰b', [
{ index: 2, type: 'insert', values: womanRedHairEmoji.slice( 1 ) },
{ index: 6, type: 'delete', howMany: 1 }
] );
} );
} );
} );
} );

Expand Down Expand Up @@ -487,10 +622,10 @@ describe( 'fastDiff', () => {
function expectDiff( oldText, newText, expected, checkDiffToChangesCompatibility = true, comparator = null ) {
const result = fastDiff( oldText, newText, comparator );

expect( result ).to.deep.equals( expected, 'fastDiff changes failed' );
expect( result ).to.deep.equal( expected, 'fastDiff changes failed' );

if ( checkDiffToChangesCompatibility ) {
expect( result ).to.deep.equals(
expect( result ).to.deep.equal(
diffToChanges( diff( oldText, newText, comparator ), newText ), 'diffToChanges compatibility failed' );
}
}
Expand All @@ -500,9 +635,9 @@ function expectDiffLinear( oldText, newText, expected, checkDiffCompatibility =
const expectedArray = expected.split( '' ).map( item => actions[ item ] );
const result = fastDiff( oldText, newText, comparator, true );

expect( result ).to.deep.equals( expectedArray, 'fastDiff linear result failed' );
expect( result ).to.deep.equal( expectedArray, 'fastDiff linear result failed' );

if ( checkDiffCompatibility ) {
expect( result ).to.deep.equals( diff( oldText, newText, comparator ), 'diff compatibility failed' );
expect( result ).to.deep.equal( diff( oldText, newText, comparator ), 'diff compatibility failed' );
}
}

0 comments on commit 6dc1ba6

Please sign in to comment.