This repository has been archived by the owner on Jun 26, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
Merged
Changes from 5 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
4423c4c
Introduced 'fastDiff' function.
f1ames 2458006
Tests: 'fastDiff' unit tests.
f1ames d867925
Changed: Refactored `fastDiff`.
scofalik f72bf11
Changed: Refactored `fastDiff`.
scofalik f021fb1
Docs: Improved docs.
scofalik e341cb6
Docs adjustments.
f1ames 47b2c4f
Merge branch 'master' into t/235
f1ames File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/** | ||
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. | ||
* For licensing, see LICENSE.md. | ||
*/ | ||
|
||
/** | ||
* @module utils/fastdiff | ||
*/ | ||
|
||
/** | ||
* Finds position of the first and last change in the given strings and generates set of changes. Set of changes | ||
* can be applied to the input text in order to transform it into the output text, for example: | ||
* | ||
* fastDiff( '12a', '12xyza' ); | ||
* // [ { index: 2, type: 'insert', values: [ 'x', 'y', 'z' ] } ] | ||
* | ||
* fastDiff( '12a', '12aa' ); | ||
* // [ { index: 3, type: 'insert', values: [ 'a' ] } ] | ||
* | ||
* fastDiff( '12xyza', '12a' ); | ||
* // [ { index: 2, type: 'delete', howMany: 3 } ] | ||
* | ||
* fastDiff( '12aa', '12a' ); | ||
* // [ { index: 3, type: 'delete', howMany: 1 } ] | ||
* | ||
* fastDiff( '12abc3', '2ab' ); | ||
* // [ { index: 0, type: 'insert', values: [ '2', 'a', 'b' ] }, { index: 3, type: 'delete', howMany: 6 } ] | ||
* | ||
* Using returned results you can modify `oldText` to make it the as the `newText`: | ||
* | ||
* let input = '12abc3'; | ||
* const output = '2ab'; | ||
* const changes = fastDiff( input, output ); | ||
* | ||
* changes.forEach( change => { | ||
* if ( change.type == 'insert' ) { | ||
* input = input.substring( 0, change.index ) + change.values.join( '' ) + input.substring( change.index ); | ||
* } else if ( change.type == 'delete' ) { | ||
* input = input.substring( 0, change.index ) + input.substring( change.index + change.howMany ); | ||
* } | ||
* } ); | ||
* | ||
* input == output; // -> true | ||
* | ||
* The output format of this function is compatible with {@link module:utils/difftochanges~diffToChanges} output format. | ||
* | ||
* @param {String} oldText Input string. | ||
* @param {String} newText Input string. | ||
* @returns {Array} Array of changes. | ||
*/ | ||
export default function fastDiff( oldText, newText ) { | ||
// Check if both texts are equal. | ||
if ( oldText === newText ) { | ||
return []; | ||
} | ||
|
||
const changeIndexes = findChangeBoundaryIndexes( oldText, newText ); | ||
|
||
return changeIndexesToChanges( newText, changeIndexes ); | ||
} | ||
|
||
// Finds position of the first and last change in the given strings. For example: | ||
// | ||
// const indexes = findChangeBoundaryIndexes( '1234', '13424' ); | ||
// console.log( indexes ); // { firstIndex: 1, lastIndexOld: 3, lastIndexNew: 4 } | ||
// | ||
// The above indexes means that in `oldText` modified part is `1[23]4` and in the `newText` it is `1[342]4`. | ||
// Based on such indexes, array with `insert`/`delete` operations which allows transforming | ||
// old text to the new one could be generated. | ||
// | ||
// It is expected that `oldText` and `newText` are different. | ||
// | ||
// @param {String} oldText | ||
// @param {String} newText | ||
// @returns {Object} | ||
// @returns {Number} return.firstIndex Index of the first change in both strings (always the same for both). | ||
// @returns {Number} result.lastIndexOld Index of the last common character in `oldText` string looking from back. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it should be:
or
Because looking for the last common character starting from the back, means you are looking for the first common character basically. |
||
// @returns {Number} result.lastIndexNew Index of the last common character in `newText` string looking from back. | ||
function findChangeBoundaryIndexes( oldText, newText ) { | ||
// Find the first difference between texts. | ||
const firstIndex = findFirstDifferenceIndex( oldText, newText ); | ||
|
||
// Remove the common part of texts and reverse them to make it simpler to find the last difference between texts. | ||
const oldTextReversed = cutAndReverse( oldText, firstIndex ); | ||
const newTextReversed = cutAndReverse( newText, firstIndex ); | ||
|
||
// Find the first difference between reversed texts. | ||
// It should be treated as "how many characters from the end the last difference occurred". | ||
// | ||
// For example: | ||
// | ||
// initial -> after cut -> reversed: | ||
// oldText: '321ba' -> '21ba' -> 'ab12' | ||
// newText: '31xba' -> '1xba' -> 'abx1' | ||
// lastIndex: -> 2 | ||
// | ||
// So the last change occurred two characters from the end of the texts. | ||
const lastIndex = findFirstDifferenceIndex( oldTextReversed, newTextReversed ); | ||
|
||
// Use `lastIndex` to calculate proper offset, starting from the beginning (`lastIndex` kind of starts from the end). | ||
const lastIndexOld = oldText.length - lastIndex; | ||
const lastIndexNew = newText.length - lastIndex; | ||
|
||
return { firstIndex, lastIndexOld, lastIndexNew }; | ||
} | ||
|
||
// Returns a first index on which `oldText` and `newText` differ. | ||
function findFirstDifferenceIndex( oldText, newText ) { | ||
for ( let i = 0; i < Math.max( oldText.length, newText.length ); i++ ) { | ||
if ( oldText[ i ] !== newText[ i ] ) { | ||
return i; | ||
} | ||
} | ||
// No "backup" return cause we assume that `oldText` and `newText` differ. This means that they either have a | ||
// difference or they have a different lengths. This means that the `if` condition will always be met eventually. | ||
} | ||
|
||
// Removes `cutHowMany` first characters from the given `text` string and then reverses it and returns it. | ||
function cutAndReverse( text, cutHowMany ) { | ||
return text.substring( cutHowMany ).split( '' ).reverse().join( '' ); | ||
} | ||
|
||
// Generates changes array based on change indexes from `findChangeBoundaryIndexes` function. This function will | ||
// generate array with 0 (no changes), 1 (deletion or insertion) or 2 records (insertion and deletion). | ||
// | ||
// @param {String} newText New text for which change indexes were calculated. | ||
// @param {Object} changeIndexes Change indexes object from `findChangeBoundaryIndexes` function. | ||
// @returns {Array.<Object>} Array of changes compatible with {@link module:utils/difftochanges~diffToChanges} format. | ||
function changeIndexesToChanges( newText, changeIndexes ) { | ||
const result = []; | ||
const { firstIndex, lastIndexOld, lastIndexNew } = changeIndexes; | ||
|
||
// Order operations as 'insert', 'delete' array to keep compatibility with {@link module:utils/difftochanges~diffToChanges} | ||
// in most cases. However, 'diffToChanges' does not stick to any order so in some cases | ||
// (for example replacing '12345' with 'abcd') it will generate 'delete', 'insert' order. | ||
if ( lastIndexNew - firstIndex > 0 ) { | ||
result.push( { | ||
index: firstIndex, | ||
type: 'insert', | ||
values: newText.substring( firstIndex, lastIndexNew ).split( '' ) | ||
} ); | ||
} | ||
|
||
if ( lastIndexOld - firstIndex > 0 ) { | ||
result.push( { | ||
index: firstIndex + ( lastIndexNew - firstIndex ), // Increase index of what was inserted. | ||
type: 'delete', | ||
howMany: lastIndexOld - firstIndex | ||
} ); | ||
} | ||
|
||
return result; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
/** | ||
* @license Copyright (c) 2003-2018, CKSource - Frederico Knabben. All rights reserved. | ||
* For licensing, see LICENSE.md. | ||
*/ | ||
|
||
import fastDiff from '../src/fastdiff'; | ||
import diff from '../src/diff'; | ||
import diffToChanges from '../src/difftochanges'; | ||
|
||
describe( 'fastDiff', () => { | ||
it( 'should diff identical texts', () => { | ||
expectDiff( '123', '123', [] ); | ||
} ); | ||
|
||
describe( 'insertion', () => { | ||
it( 'should diff if old text is empty', () => { | ||
expectDiff( '', '123', [ { index: 0, type: 'insert', values: [ '1', '2', '3' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on the beginning', () => { | ||
expectDiff( '123', 'abc123', [ { index: 0, type: 'insert', values: [ 'a', 'b', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on the beginning (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 0, type: 'insert', values: [ 'a', 'b' ] }, { index: 5, type: 'insert', values: [ 'c', '1', '2', '3' ] } ] | ||
expectDiff( '123', 'ab123c123', [ { index: 0, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ], false ); | ||
} ); | ||
|
||
it( 'should diff insertion on the end', () => { | ||
expectDiff( '123', '123abc', [ { index: 3, type: 'insert', values: [ 'a', 'b', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on the end (repetitive substring)', () => { | ||
expectDiff( '123', '123ab123c', [ { index: 3, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion in the middle', () => { | ||
expectDiff( '123', '12abc3', [ { index: 2, type: 'insert', values: [ 'a', 'b', 'c' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion in the middle (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'insert', values: [ 'a', 'b', '1', '2' ] }, { index: 7, type: 'insert', values: [ 'c', '3' ] } ] | ||
expectDiff( '123', '12ab123c3', [ { index: 2, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] } ], false ); | ||
} ); | ||
|
||
it( 'should diff insertion of duplicated content', () => { | ||
expectDiff( '123', '123123', [ { index: 3, type: 'insert', values: [ '1', '2', '3' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion of partially duplicated content', () => { | ||
expectDiff( '123', '12323', [ { index: 3, type: 'insert', values: [ '2', '3' ] } ] ); | ||
} ); | ||
|
||
it( 'should diff insertion on both boundaries', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'insert', values: [ 'a', 'b' ] }, { index: 5, type: 'insert', values: [ 'c' ] } ] | ||
expectDiff( '123', 'ab123c', [ | ||
{ index: 0, type: 'insert', values: [ 'a', 'b', '1', '2', '3', 'c' ] }, | ||
{ index: 6, type: 'delete', howMany: 3 } | ||
], false ); | ||
} ); | ||
} ); | ||
|
||
describe( 'deletion', () => { | ||
it( 'should diff if new text is empty', () => { | ||
expectDiff( '123', '', [ { index: 0, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion on the beginning', () => { | ||
expectDiff( 'abc123', '123', [ { index: 0, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion on the beginning (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 0, type: 'delete', howMany: 2 }, { index: 3, type: 'delete', howMany: 4 } ] | ||
expectDiff( 'ab123c123', '123', [ { index: 0, type: 'delete', howMany: 6 } ], false ); | ||
} ); | ||
|
||
it( 'should diff deletion on the end', () => { | ||
expectDiff( '123abc', '123', [ { index: 3, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion on the end (repetitive substring)', () => { | ||
expectDiff( '123ab123c', '123', [ { index: 3, type: 'delete', howMany: 6 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion in the middle', () => { | ||
expectDiff( '12abc3', '123', [ { index: 2, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion in the middle (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'delete', howMany: 4 }, { index: 3, type: 'delete', howMany: 2 } ] | ||
expectDiff( '12ab123c3', '123', [ { index: 2, type: 'delete', howMany: 6 } ], false ); | ||
} ); | ||
|
||
it( 'should diff deletion on both boundaries', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 0, type: 'delete', howMany: 1 }, { index: 3, type: 'delete', howMany: 2 } ] | ||
expectDiff( '12abc3', '2ab', [ | ||
{ index: 0, type: 'insert', values: [ '2', 'a', 'b' ] }, | ||
{ index: 3, type: 'delete', howMany: 6 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff deletion of duplicated content', () => { | ||
expectDiff( '123123', '123', [ { index: 3, type: 'delete', howMany: 3 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion of partially duplicated content', () => { | ||
expectDiff( '12323', '123', [ { index: 3, type: 'delete', howMany: 2 } ] ); | ||
} ); | ||
|
||
it( 'should diff deletion of partially duplicated content 2', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 1, type: 'delete', howMany: 2 }, { index: 2, type: 'delete', howMany: 1 } ] | ||
expectDiff( '11233', '13', [ { index: 1, type: 'delete', howMany: 3 } ], false ); | ||
} ); | ||
} ); | ||
|
||
describe( 'replacement', () => { | ||
it( 'should diff replacement of entire text', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '12345', 'abcd', [ | ||
{ index: 0, type: 'insert', values: [ 'a', 'b', 'c', 'd' ] }, | ||
{ index: 4, type: 'delete', howMany: 5 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement on the beginning', () => { | ||
expectDiff( '12345', 'abcd345', [ | ||
{ index: 0, type: 'insert', values: [ 'a', 'b', 'c', 'd' ] }, | ||
{ index: 4, type: 'delete', howMany: 2 } | ||
] ); | ||
} ); | ||
|
||
it( 'should diff replacement on the beginning (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '12345', '345345', [ | ||
{ index: 0, type: 'insert', values: [ '3', '4', '5' ] }, | ||
{ index: 3, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement on the end', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '12345', '12ab', [ | ||
{ index: 2, type: 'insert', values: [ 'a', 'b' ] }, | ||
{ index: 4, type: 'delete', howMany: 3 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement on the end (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 3, type: 'insert', values: [ '1', '2', '3' ] }, { index: 7, type: 'delete', howMany: 1 } ] | ||
expectDiff( '12345', '1231234', [ | ||
{ index: 3, type: 'insert', values: [ '1', '2', '3', '4' ] }, | ||
{ index: 7, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff insertion of duplicated content', () => { | ||
expectDiff( '1234', '123123', [ | ||
{ index: 3, type: 'insert', values: [ '1', '2', '3' ] }, | ||
{ index: 6, type: 'delete', howMany: 1 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff insertion of duplicated content', () => { | ||
expectDiff( '1234', '13424', [ | ||
{ index: 1, type: 'insert', values: [ '3', '4', '2' ] }, | ||
{ index: 4, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement in the middle', () => { | ||
expectDiff( '12345', '12ab5', [ | ||
{ index: 2, type: 'insert', values: [ 'a', 'b' ] }, | ||
{ index: 4, type: 'delete', howMany: 2 } | ||
] ); | ||
} ); | ||
|
||
it( 'should diff replacement in the middle (repetitive substring)', () => { | ||
// Do not check compatibility with 'diffToChanges' as it generates: | ||
// [ { index: 2, type: 'insert', values: [ '1', '2' ] }, { index: 7, type: 'insert', values: [ '5' ] } ] | ||
expectDiff( '12345', '12123455', [ | ||
{ index: 2, type: 'insert', values: [ '1', '2', '3', '4', '5' ] }, | ||
{ index: 7, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
|
||
it( 'should diff replacement of duplicated content', () => { | ||
// Do not check compatibility with 'diffToChanges' as it has changes in reveres order ('delete', 'insert') here. | ||
expectDiff( '123123', '123333', [ | ||
{ index: 3, type: 'insert', values: '33'.split( '' ) }, | ||
{ index: 5, type: 'delete', howMany: 2 } | ||
], false ); | ||
} ); | ||
} ); | ||
} ); | ||
|
||
function expectDiff( oldText, newText, expected, checkDiffToChangesCompatibility = true ) { | ||
const result = fastDiff( oldText, newText ); | ||
|
||
expect( result ).to.deep.equals( expected ); | ||
|
||
if ( checkDiffToChangesCompatibility ) { | ||
expect( result ).to.deep.equals( diffToChanges( diff( oldText, newText ), newText ), 'diffToChanges compatibility' ); | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it return correct values if strings are the same? If not, then I'd add a note that it is assumed that the strings are different.