Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Introduced fastDiff() #238

Merged
merged 7 commits into from
May 9, 2018
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions src/fastdiff.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

/**
* @module utils/fastdiff
*/

/**
* Finds position of the first and last change in the given strings and generates set of changes. Set of changes
* can be applied to the input text in order to transform it into the output text, for example:
*
* fastDiff( '12a', '12xyza' );
* // [ { index: 2, type: 'insert', values: [ 'x', 'y', 'z' ] } ]
*
* fastDiff( '12a', '12aa' );
* // [ { index: 3, type: 'insert', values: [ 'a' ] } ]
*
* fastDiff( '12xyza', '12a' );
* // [ { index: 2, type: 'delete', howMany: 3 } ]
*
* fastDiff( '12aa', '12a' );
* // [ { index: 3, type: 'delete', howMany: 1 } ]
*
* fastDiff( '12abc3', '2ab' );
* // [ { index: 0, type: 'insert', values: [ '2', 'a', 'b' ] }, { index: 3, type: 'delete', howMany: 6 } ]
*
* Using returned results you can modify `oldText` to make it the as the `newText`:
*
* let input = '12abc3';
* const output = '2ab';
* const changes = fastDiff( input, output );
*
* changes.forEach( change => {
* if ( change.type == 'insert' ) {
* input = input.substring( 0, change.index ) + change.values.join( '' ) + input.substring( change.index );
* } else if ( change.type == 'delete' ) {
* input = input.substring( 0, change.index ) + input.substring( change.index + change.howMany );
* }
* } );
*
* input == output; // -> true
*
* The output format of this function is compatible with {@link module:utils/difftochanges~diffToChanges} output format.
*
* @param {String} oldText Input string.
* @param {String} newText Input string.
* @returns {Array} Array of changes.
*/
export default function fastDiff( oldText, newText ) {
// Check if both texts are equal.
if ( oldText === newText ) {
return [];
}

const changeIndexes = findChangeBoundaryIndexes( oldText, newText );

return changeIndexesToChanges( newText, changeIndexes );
}

// Finds position of the first and last change in the given strings. For example:
//
// const indexes = findChangeBoundaryIndexes( '1234', '13424' );
// console.log( indexes ); // { firstIndex: 1, lastIndexOld: 3, lastIndexNew: 4 }
//
// The above indexes means that in `oldText` modified part is `1[23]4` and in the `newText` it is `1[342]4`.
// Based on such indexes, array with `insert`/`delete` operations which allows transforming
// old text to the new one could be generated.
//
Copy link
Contributor

@scofalik scofalik Apr 20, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it return correct values if strings are the same? If not, then I'd add a note that it is assumed that the strings are different.

// It is expected that `oldText` and `newText` are different.
//
// @param {String} oldText
// @param {String} newText
// @returns {Object}
// @returns {Number} return.firstIndex Index of the first change in both strings (always the same for both).
// @returns {Number} result.lastIndexOld Index of the last common character in `oldText` string looking from back.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it should be:

result.lastIndexOld Index of the last common character in oldText string.

or

result.lastIndexOld Index of the first common character in oldText string looking from back.

Because looking for the last common character starting from the back, means you are looking for the first common character basically.

// @returns {Number} result.lastIndexNew Index of the last common character in `newText` string looking from back.
function findChangeBoundaryIndexes( oldText, newText ) {
// Find the first difference between texts.
const firstIndex = findFirstDifferenceIndex( oldText, newText );

// Remove the common part of texts and reverse them to make it simpler to find the last difference between texts.
const oldTextReversed = cutAndReverse( oldText, firstIndex );
const newTextReversed = cutAndReverse( newText, firstIndex );

// Find the first difference between reversed texts.
// It should be treated as "how many characters from the end the last difference occurred".
//
// For example:
//
// initial -> after cut -> reversed:
// oldText: '321ba' -> '21ba' -> 'ab12'
// newText: '31xba' -> '1xba' -> 'abx1'
// lastIndex: -> 2
//
// So the last change occurred two characters from the end of the texts.
const lastIndex = findFirstDifferenceIndex( oldTextReversed, newTextReversed );

// Use `lastIndex` to calculate proper offset, starting from the beginning (`lastIndex` kind of starts from the end).
const lastIndexOld = oldText.length - lastIndex;
const lastIndexNew = newText.length - lastIndex;

return { firstIndex, lastIndexOld, lastIndexNew };
}

// Returns a first index on which `oldText` and `newText` differ.
function findFirstDifferenceIndex( oldText, newText ) {
for ( let i = 0; i < Math.max( oldText.length, newText.length ); i++ ) {
if ( oldText[ i ] !== newText[ i ] ) {
return i;
}
}
// No "backup" return cause we assume that `oldText` and `newText` differ. This means that they either have a
// difference or they have a different lengths. This means that the `if` condition will always be met eventually.
}

// Removes `cutHowMany` first characters from the given `text` string and then reverses it and returns it.
function cutAndReverse( text, cutHowMany ) {
return text.substring( cutHowMany ).split( '' ).reverse().join( '' );
}

// Generates changes array based on change indexes from `findChangeBoundaryIndexes` function. This function will
// generate array with 0 (no changes), 1 (deletion or insertion) or 2 records (insertion and deletion).
//
// @param {String} newText New text for which change indexes were calculated.
// @param {Object} changeIndexes Change indexes object from `findChangeBoundaryIndexes` function.
// @returns {Array.<Object>} Array of changes compatible with {@link module:utils/difftochanges~diffToChanges} format.
function changeIndexesToChanges( newText, changeIndexes ) {
const result = [];
const { firstIndex, lastIndexOld, lastIndexNew } = changeIndexes;

// Order operations as 'insert', 'delete' array to keep compatibility with {@link module:utils/difftochanges~diffToChanges}
// in most cases. However, 'diffToChanges' does not stick to any order so in some cases
// (for example replacing '12345' with 'abcd') it will generate 'delete', 'insert' order.
if ( lastIndexNew - firstIndex > 0 ) {
result.push( {
index: firstIndex,
type: 'insert',
values: newText.substring( firstIndex, lastIndexNew ).split( '' )
} );
}

if ( lastIndexOld - firstIndex > 0 ) {
result.push( {
index: firstIndex + ( lastIndexNew - firstIndex ), // Increase index of what was inserted.
type: 'delete',
howMany: lastIndexOld - firstIndex
} );
}

return result;
}
212 changes: 212 additions & 0 deletions tests/fastdiff.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
/**
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

import fastDiff from '../src/fastdiff';
import diff from '../src/diff';
import diffToChanges from '../src/difftochanges';

describe( 'fastDiff', () => {
it( 'should diff identical texts', () => {
expectDiff( '123', '123', [] );
} );

describe( 'insertion', () => {
it( 'should diff if old text is empty', () => {
expectDiff( '', '123', [ { index: 0, type: 'insert', values: [ '1', '2', '3' ] } ] );
} );

it( 'should diff insertion on the beginning', () => {
expectDiff( '123', 'abc123', [ { index: 0, type: 'insert', values: [ 'a', 'b', 'c' ] } ] );
} );

it( 'should diff insertion on the beginning (repetitive substring)', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 0, type: 'insert', values: [ 'a', 'b' ] }, { index: 5, type: 'insert', values: [ 'c', '1', '2', '3' ] } ]
expectDiff( '123', 'ab123c123', [ { index: 0, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ], false );
} );

it( 'should diff insertion on the end', () => {
expectDiff( '123', '123abc', [ { index: 3, type: 'insert', values: [ 'a', 'b', 'c' ] } ] );
} );

it( 'should diff insertion on the end (repetitive substring)', () => {
expectDiff( '123', '123ab123c', [ { index: 3, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ] );
} );

it( 'should diff insertion in the middle', () => {
expectDiff( '123', '12abc3', [ { index: 2, type: 'insert', values: [ 'a', 'b', 'c' ] } ] );
} );

it( 'should diff insertion in the middle (repetitive substring)', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 2, type: 'insert', values: [ 'a', 'b', '1', '2' ] }, { index: 7, type: 'insert', values: [ 'c', '3' ] } ]
expectDiff( '123', '12ab123c3', [ { index: 2, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ], false );
} );

it( 'should diff insertion of duplicated content', () => {
expectDiff( '123', '123123', [ { index: 3, type: 'insert', values: [ '1', '2', '3' ] } ] );
} );

it( 'should diff insertion of partially duplicated content', () => {
expectDiff( '123', '12323', [ { index: 3, type: 'insert', values: [ '2', '3' ] } ] );
} );

it( 'should diff insertion on both boundaries', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 2, type: 'insert', values: [ 'a', 'b' ] }, { index: 5, type: 'insert', values: [ 'c' ] } ]
expectDiff( '123', 'ab123c', [
{ index: 0, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] },
{ index: 6, type: 'delete', howMany: 3 }
], false );
} );
} );

describe( 'deletion', () => {
it( 'should diff if new text is empty', () => {
expectDiff( '123', '', [ { index: 0, type: 'delete', howMany: 3 } ] );
} );

it( 'should diff deletion on the beginning', () => {
expectDiff( 'abc123', '123', [ { index: 0, type: 'delete', howMany: 3 } ] );
} );

it( 'should diff deletion on the beginning (repetitive substring)', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 0, type: 'delete', howMany: 2 }, { index: 3, type: 'delete', howMany: 4 } ]
expectDiff( 'ab123c123', '123', [ { index: 0, type: 'delete', howMany: 6 } ], false );
} );

it( 'should diff deletion on the end', () => {
expectDiff( '123abc', '123', [ { index: 3, type: 'delete', howMany: 3 } ] );
} );

it( 'should diff deletion on the end (repetitive substring)', () => {
expectDiff( '123ab123c', '123', [ { index: 3, type: 'delete', howMany: 6 } ] );
} );

it( 'should diff deletion in the middle', () => {
expectDiff( '12abc3', '123', [ { index: 2, type: 'delete', howMany: 3 } ] );
} );

it( 'should diff deletion in the middle (repetitive substring)', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 2, type: 'delete', howMany: 4 }, { index: 3, type: 'delete', howMany: 2 } ]
expectDiff( '12ab123c3', '123', [ { index: 2, type: 'delete', howMany: 6 } ], false );
} );

it( 'should diff deletion on both boundaries', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 0, type: 'delete', howMany: 1 }, { index: 3, type: 'delete', howMany: 2 } ]
expectDiff( '12abc3', '2ab', [
{ index: 0, type: 'insert', values: [ '2', 'a', 'b' ] },
{ index: 3, type: 'delete', howMany: 6 }
], false );
} );

it( 'should diff deletion of duplicated content', () => {
expectDiff( '123123', '123', [ { index: 3, type: 'delete', howMany: 3 } ] );
} );

it( 'should diff deletion of partially duplicated content', () => {
expectDiff( '12323', '123', [ { index: 3, type: 'delete', howMany: 2 } ] );
} );

it( 'should diff deletion of partially duplicated content 2', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 1, type: 'delete', howMany: 2 }, { index: 2, type: 'delete', howMany: 1 } ]
expectDiff( '11233', '13', [ { index: 1, type: 'delete', howMany: 3 } ], false );
} );
} );

describe( 'replacement', () => {
it( 'should diff replacement of entire text', () => {
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here.
expectDiff( '12345', 'abcd', [
{ index: 0, type: 'insert', values: [ 'a', 'b', 'c', 'd' ] },
{ index: 4, type: 'delete', howMany: 5 }
], false );
} );

it( 'should diff replacement on the beginning', () => {
expectDiff( '12345', 'abcd345', [
{ index: 0, type: 'insert', values: [ 'a', 'b', 'c', 'd' ] },
{ index: 4, type: 'delete', howMany: 2 }
] );
} );

it( 'should diff replacement on the beginning (repetitive substring)', () => {
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here.
expectDiff( '12345', '345345', [
{ index: 0, type: 'insert', values: [ '3', '4', '5' ] },
{ index: 3, type: 'delete', howMany: 2 }
], false );
} );

it( 'should diff replacement on the end', () => {
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here.
expectDiff( '12345', '12ab', [
{ index: 2, type: 'insert', values: [ 'a', 'b' ] },
{ index: 4, type: 'delete', howMany: 3 }
], false );
} );

it( 'should diff replacement on the end (repetitive substring)', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 3, type: 'insert', values: [ '1', '2', '3' ] }, { index: 7, type: 'delete', howMany: 1 } ]
expectDiff( '12345', '1231234', [
{ index: 3, type: 'insert', values: [ '1', '2', '3', '4' ] },
{ index: 7, type: 'delete', howMany: 2 }
], false );
} );

it( 'should diff insertion of duplicated content', () => {
expectDiff( '1234', '123123', [
{ index: 3, type: 'insert', values: [ '1', '2', '3' ] },
{ index: 6, type: 'delete', howMany: 1 }
], false );
} );

it( 'should diff insertion of duplicated content', () => {
expectDiff( '1234', '13424', [
{ index: 1, type: 'insert', values: [ '3', '4', '2' ] },
{ index: 4, type: 'delete', howMany: 2 }
], false );
} );

it( 'should diff replacement in the middle', () => {
expectDiff( '12345', '12ab5', [
{ index: 2, type: 'insert', values: [ 'a', 'b' ] },
{ index: 4, type: 'delete', howMany: 2 }
] );
} );

it( 'should diff replacement in the middle (repetitive substring)', () => {
// Do not check compatibility with 'diffToChanges' as it generates:
// [ { index: 2, type: 'insert', values: [ '1', '2' ] }, { index: 7, type: 'insert', values: [ '5' ] } ]
expectDiff( '12345', '12123455', [
{ index: 2, type: 'insert', values: [ '1', '2', '3', '4', '5' ] },
{ index: 7, type: 'delete', howMany: 2 }
], false );
} );

it( 'should diff replacement of duplicated content', () => {
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here.
expectDiff( '123123', '123333', [
{ index: 3, type: 'insert', values: '33'.split( '' ) },
{ index: 5, type: 'delete', howMany: 2 }
], false );
} );
} );
} );

function expectDiff( oldText, newText, expected, checkDiffToChangesCompatibility = true ) {
const result = fastDiff( oldText, newText );

expect( result ).to.deep.equals( expected );

if ( checkDiffToChangesCompatibility ) {
expect( result ).to.deep.equals( diffToChanges( diff( oldText, newText ), newText ), 'diffToChanges compatibility' );
}
}