This repository has been archived by the owner on Jun 26, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #238 from ckeditor/t/235
Other: Introduced `fastDiff` diffing function. Closes #235.
- Loading branch information
Showing
2 changed files
with
373 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
/** | ||
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. | ||
* For licensing, see LICENSE.md. | ||
*/ | ||
|
||
/** | ||
* @module utils/fastdiff | ||
*/ | ||
|
||
/** | ||
* Finds position of the first and last change in the given strings and generates set of changes. Set of changes | ||
* can be applied to the input text in order to transform it into the output text, for example: | ||
* | ||
* fastDiff( '12a', '12xyza' ); | ||
* // [ { index: 2, type: 'insert', values: [ 'x', 'y', 'z' ] } ] | ||
* | ||
* fastDiff( '12a', '12aa' ); | ||
* // [ { index: 3, type: 'insert', values: [ 'a' ] } ] | ||
* | ||
* fastDiff( '12xyza', '12a' ); | ||
* // [ { index: 2, type: 'delete', howMany: 3 } ] | ||
* | ||
* fastDiff( '12aa', '12a' ); | ||
* // [ { index: 3, type: 'delete', howMany: 1 } ] | ||
* | ||
* fastDiff( '12abc3', '2ab' ); | ||
* // [ { index: 0, type: 'insert', values: [ '2', 'a', 'b' ] }, { index: 3, type: 'delete', howMany: 6 } ] | ||
* | ||
* Using returned results you can modify `oldText` to transform it into `newText`: | ||
* | ||
* let input = '12abc3'; | ||
* const output = '2ab'; | ||
* const changes = fastDiff( input, output ); | ||
* | ||
* changes.forEach( change => { | ||
* if ( change.type == 'insert' ) { | ||
* input = input.substring( 0, change.index ) + change.values.join( '' ) + input.substring( change.index ); | ||
* } else if ( change.type == 'delete' ) { | ||
* input = input.substring( 0, change.index ) + input.substring( change.index + change.howMany ); | ||
* } | ||
* } ); | ||
* | ||
* input === output; // -> true | ||
* | ||
* The output format of this function is compatible with {@link module:utils/difftochanges~diffToChanges} output format. | ||
* | ||
* @param {String} oldText Input string. | ||
* @param {String} newText Input string. | ||
* @returns {Array} Array of changes. | ||
*/ | ||
export default function fastDiff( oldText, newText ) { | ||
// Check if both texts are equal. | ||
if ( oldText === newText ) { | ||
return []; | ||
} | ||
|
||
const changeIndexes = findChangeBoundaryIndexes( oldText, newText ); | ||
|
||
return changeIndexesToChanges( newText, changeIndexes ); | ||
} | ||
|
||
// Finds position of the first and last change in the given strings. For example: | ||
// | ||
// const indexes = findChangeBoundaryIndexes( '1234', '13424' ); | ||
// console.log( indexes ); // { firstIndex: 1, lastIndexOld: 3, lastIndexNew: 4 } | ||
// | ||
// The above indexes means that in `oldText` modified part is `1[23]4` and in the `newText` it is `1[342]4`. | ||
// Based on such indexes, array with `insert`/`delete` operations which allows transforming | ||
// old text to the new one can be generated. | ||
// | ||
// It is expected that `oldText` and `newText` are different. | ||
// | ||
// @param {String} oldText | ||
// @param {String} newText | ||
// @returns {Object} | ||
// @returns {Number} return.firstIndex Index of the first change in both strings (always the same for both). | ||
// @returns {Number} result.lastIndexOld Index of the last common character in `oldText` string. | ||
// @returns {Number} result.lastIndexNew Index of the last common character in `newText` string. | ||
function findChangeBoundaryIndexes( oldText, newText ) { | ||
// Find the first difference between texts. | ||
const firstIndex = findFirstDifferenceIndex( oldText, newText ); | ||
|
||
// Remove the common part of texts and reverse them to make it simpler to find the last difference between texts. | ||
const oldTextReversed = cutAndReverse( oldText, firstIndex ); | ||
const newTextReversed = cutAndReverse( newText, firstIndex ); | ||
|
||
// Find the first difference between reversed texts. | ||
// It should be treated as "how many characters from the end the last difference occurred". | ||
// | ||
// For example: | ||
// | ||
// initial -> after cut -> reversed: | ||
// oldText: '321ba' -> '21ba' -> 'ab12' | ||
// newText: '31xba' -> '1xba' -> 'abx1' | ||
// lastIndex: -> 2 | ||
// | ||
// So the last change occurred two characters from the end of the texts. | ||
const lastIndex = findFirstDifferenceIndex( oldTextReversed, newTextReversed ); | ||
|
||
// Use `lastIndex` to calculate proper offset, starting from the beginning (`lastIndex` kind of starts from the end). | ||
const lastIndexOld = oldText.length - lastIndex; | ||
const lastIndexNew = newText.length - lastIndex; | ||
|
||
return { firstIndex, lastIndexOld, lastIndexNew }; | ||
} | ||
|
||
// Returns a first index on which `oldText` and `newText` differ. | ||
// | ||
// @param {String} oldText | ||
// @param {String} newText | ||
// @returns {Number} | ||
function findFirstDifferenceIndex( oldText, newText ) { | ||
for ( let i = 0; i < Math.max( oldText.length, newText.length ); i++ ) { | ||
if ( oldText[ i ] !== newText[ i ] ) { | ||
return i; | ||
} | ||
} | ||
// No "backup" return cause we assume that `oldText` and `newText` differ. This means that they either have a | ||
// difference or they have a different lengths. This means that the `if` condition will always be met eventually. | ||
} | ||
|
||
// Removes `howMany` characters from the given `text` string starting from the beginning, then reverses and returns it. | ||
// | ||
// @param {String} text Text to be processed. | ||
// @param {Number} howMany How many characters from text beginning to cut. | ||
// @returns {String} Shortened and reversed text. | ||
function cutAndReverse( text, howMany ) { | ||
return text.substring( howMany ).split( '' ).reverse().join( '' ); | ||
} | ||
|
||
// Generates changes array based on change indexes from `findChangeBoundaryIndexes` function. This function will | ||
// generate array with 0 (no changes), 1 (deletion or insertion) or 2 records (insertion and deletion). | ||
// | ||
// @param {String} newText New text for which change indexes were calculated. | ||
// @param {Object} changeIndexes Change indexes object from `findChangeBoundaryIndexes` function. | ||
// @returns {Array.<Object>} Array of changes compatible with {@link module:utils/difftochanges~diffToChanges} format. | ||
function changeIndexesToChanges( newText, changeIndexes ) { | ||
const result = []; | ||
const { firstIndex, lastIndexOld, lastIndexNew } = changeIndexes; | ||
|
||
// Order operations as 'insert', 'delete' array to keep compatibility with {@link module:utils/difftochanges~diffToChanges} | ||
// in most cases. However, 'diffToChanges' does not stick to any order so in some cases | ||
// (for example replacing '12345' with 'abcd') it will generate 'delete', 'insert' order. | ||
if ( lastIndexNew - firstIndex > 0 ) { | ||
result.push( { | ||
index: firstIndex, | ||
type: 'insert', | ||
values: newText.substring( firstIndex, lastIndexNew ).split( '' ) | ||
} ); | ||
} | ||
|
||
if ( lastIndexOld - firstIndex > 0 ) { | ||
result.push( { | ||
index: firstIndex + ( lastIndexNew - firstIndex ), // Increase index of what was inserted. | ||
type: 'delete', | ||
howMany: lastIndexOld - firstIndex | ||
} ); | ||
} | ||
|
||
return result; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
/** | ||
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. | ||
* For licensing, see LICENSE.md. | ||
*/ | ||
|
||
import fastDiff from '../src/fastdiff'; | ||
import diff from '../src/diff'; | ||
import diffToChanges from '../src/difftochanges'; | ||
|
||
describe( 'fastDiff', () => { | ||
it( 'should diff identical texts', () => { | ||
expectDiff( '123', '123', [] ); | ||
} ); | ||
|
||
describe( 'insertion', () => { | ||
it( 'should diff if old text is empty', () => { | ||
expectDiff( '', '123', [ { index: 0, type: 'insert', values: [ '1', '2', '3' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on the beginning', () => { | ||
expectDiff( '123', 'abc123', [ { index: 0, type: 'insert', values: [ 'a', 'b', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on the beginning (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 0, type: 'insert', values: [ 'a', 'b' ] }, { index: 5, type: 'insert', values: [ 'c', '1', '2', '3' ] } ] | ||
expectDiff( '123', 'ab123c123', [ { index: 0, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ], false ); | ||
} ); | ||
|
||
it( 'should diff insertion on the end', () => { | ||
expectDiff( '123', '123abc', [ { index: 3, type: 'insert', values: [ 'a', 'b', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on the end (repetitive substring)', () => { | ||
expectDiff( '123', '123ab123c', [ { index: 3, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion in the middle', () => { | ||
expectDiff( '123', '12abc3', [ { index: 2, type: 'insert', values: [ 'a', 'b', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion in the middle (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'insert', values: [ 'a', 'b', '1', '2' ] }, { index: 7, type: 'insert', values: [ 'c', '3' ] } ] | ||
expectDiff( '123', '12ab123c3', [ { index: 2, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ], false ); | ||
} ); | ||
|
||
it( 'should diff insertion of duplicated content', () => { | ||
expectDiff( '123', '123123', [ { index: 3, type: 'insert', values: [ '1', '2', '3' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion of partially duplicated content', () => { | ||
expectDiff( '123', '12323', [ { index: 3, type: 'insert', values: [ '2', '3' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on both boundaries', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'insert', values: [ 'a', 'b' ] }, { index: 5, type: 'insert', values: [ 'c' ] } ] | ||
expectDiff( '123', 'ab123c', [ | ||
{ index: 0, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] }, | ||
{ index: 6, type: 'delete', howMany: 3 } | ||
], false ); | ||
} ); | ||
} ); | ||
|
||
describe( 'deletion', () => { | ||
it( 'should diff if new text is empty', () => { | ||
expectDiff( '123', '', [ { index: 0, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion on the beginning', () => { | ||
expectDiff( 'abc123', '123', [ { index: 0, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion on the beginning (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 0, type: 'delete', howMany: 2 }, { index: 3, type: 'delete', howMany: 4 } ] | ||
expectDiff( 'ab123c123', '123', [ { index: 0, type: 'delete', howMany: 6 } ], false ); | ||
} ); | ||
|
||
it( 'should diff deletion on the end', () => { | ||
expectDiff( '123abc', '123', [ { index: 3, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion on the end (repetitive substring)', () => { | ||
expectDiff( '123ab123c', '123', [ { index: 3, type: 'delete', howMany: 6 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion in the middle', () => { | ||
expectDiff( '12abc3', '123', [ { index: 2, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion in the middle (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'delete', howMany: 4 }, { index: 3, type: 'delete', howMany: 2 } ] | ||
expectDiff( '12ab123c3', '123', [ { index: 2, type: 'delete', howMany: 6 } ], false ); | ||
} ); | ||
|
||
it( 'should diff deletion on both boundaries', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 0, type: 'delete', howMany: 1 }, { index: 3, type: 'delete', howMany: 2 } ] | ||
expectDiff( '12abc3', '2ab', [ | ||
{ index: 0, type: 'insert', values: [ '2', 'a', 'b' ] }, | ||
{ index: 3, type: 'delete', howMany: 6 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff deletion of duplicated content', () => { | ||
expectDiff( '123123', '123', [ { index: 3, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion of partially duplicated content', () => { | ||
expectDiff( '12323', '123', [ { index: 3, type: 'delete', howMany: 2 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion of partially duplicated content 2', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 1, type: 'delete', howMany: 2 }, { index: 2, type: 'delete', howMany: 1 } ] | ||
expectDiff( '11233', '13', [ { index: 1, type: 'delete', howMany: 3 } ], false ); | ||
} ); | ||
} ); | ||
|
||
describe( 'replacement', () => { | ||
it( 'should diff replacement of entire text', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '12345', 'abcd', [ | ||
{ index: 0, type: 'insert', values: [ 'a', 'b', 'c', 'd' ] }, | ||
{ index: 4, type: 'delete', howMany: 5 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement on the beginning', () => { | ||
expectDiff( '12345', 'abcd345', [ | ||
{ index: 0, type: 'insert', values: [ 'a', 'b', 'c', 'd' ] }, | ||
{ index: 4, type: 'delete', howMany: 2 } | ||
] ); | ||
} ); | ||
|
||
it( 'should diff replacement on the beginning (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '12345', '345345', [ | ||
{ index: 0, type: 'insert', values: [ '3', '4', '5' ] }, | ||
{ index: 3, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement on the end', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '12345', '12ab', [ | ||
{ index: 2, type: 'insert', values: [ 'a', 'b' ] }, | ||
{ index: 4, type: 'delete', howMany: 3 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement on the end (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 3, type: 'insert', values: [ '1', '2', '3' ] }, { index: 7, type: 'delete', howMany: 1 } ] | ||
expectDiff( '12345', '1231234', [ | ||
{ index: 3, type: 'insert', values: [ '1', '2', '3', '4' ] }, | ||
{ index: 7, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff insertion of duplicated content', () => { | ||
expectDiff( '1234', '123123', [ | ||
{ index: 3, type: 'insert', values: [ '1', '2', '3' ] }, | ||
{ index: 6, type: 'delete', howMany: 1 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff insertion of duplicated content', () => { | ||
expectDiff( '1234', '13424', [ | ||
{ index: 1, type: 'insert', values: [ '3', '4', '2' ] }, | ||
{ index: 4, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement in the middle', () => { | ||
expectDiff( '12345', '12ab5', [ | ||
{ index: 2, type: 'insert', values: [ 'a', 'b' ] }, | ||
{ index: 4, type: 'delete', howMany: 2 } | ||
] ); | ||
} ); | ||
|
||
it( 'should diff replacement in the middle (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'insert', values: [ '1', '2' ] }, { index: 7, type: 'insert', values: [ '5' ] } ] | ||
expectDiff( '12345', '12123455', [ | ||
{ index: 2, type: 'insert', values: [ '1', '2', '3', '4', '5' ] }, | ||
{ index: 7, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement of duplicated content', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '123123', '123333', [ | ||
{ index: 3, type: 'insert', values: '33'.split( '' ) }, | ||
{ index: 5, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
} ); | ||
} ); | ||
|
||
function expectDiff( oldText, newText, expected, checkDiffToChangesCompatibility = true ) { | ||
const result = fastDiff( oldText, newText ); | ||
|
||
expect( result ).to.deep.equals( expected ); | ||
|
||
if ( checkDiffToChangesCompatibility ) { | ||
expect( result ).to.deep.equals( diffToChanges( diff( oldText, newText ), newText ), 'diffToChanges compatibility' ); | ||
} | ||
} |