-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathdiff.ts
338 lines (310 loc) · 13.3 KB
/
diff.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
import {Pointer} from './pointer' // we only need this for type inference
import {hasOwnProperty, objectType} from './util'
/**
All diff* functions should return a list of operations, often empty.
Each operation should be an object with two to four fields:
* `op`: the name of the operation; one of "add", "remove", "replace", "move",
"copy", or "test".
* `path`: a JSON pointer string
* `from`: a JSON pointer string
* `value`: a JSON value
The different operations have different arguments.
* "add": [`path`, `value`]
* "remove": [`path`]
* "replace": [`path`, `value`]
* "move": [`from`, `path`]
* "copy": [`from`, `path`]
* "test": [`path`, `value`]
Currently this only really differentiates between Arrays, Objects, and
Everything Else, which is pretty much just what JSON substantially
differentiates between.
*/
export interface AddOperation { op: 'add', path: string, value: any }
export interface RemoveOperation { op: 'remove', path: string }
export interface ReplaceOperation { op: 'replace', path: string, value: any }
export interface MoveOperation { op: 'move', from: string, path: string }
export interface CopyOperation { op: 'copy', from: string, path: string }
export interface TestOperation { op: 'test', path: string, value: any }
export type Operation = AddOperation |
RemoveOperation |
ReplaceOperation |
MoveOperation |
CopyOperation |
TestOperation
export function isDestructive({op}: Operation): boolean {
return op === 'remove' || op === 'replace' || op === 'copy' || op === 'move'
}
export type Diff = (input: any, output: any, ptr: Pointer) => Operation[]
/**
VoidableDiff exists to allow the user to provide a partial diff(...) function,
falling back to the built-in diffAny(...) function if the user-provided function
returns void.
*/
export type VoidableDiff = (input: any, output: any, ptr: Pointer) => Operation[] | void
/**
List the keys in `minuend` that are not in `subtrahend`.
A key is only considered if it is both 1) an own-property (o.hasOwnProperty(k))
of the object, and 2) has a value that is not undefined. This is to match JSON
semantics, where JSON object serialization drops keys with undefined values.
@param minuend Object of interest
@param subtrahend Object of comparison
@returns Array of keys that are in `minuend` but not in `subtrahend`.
*/
export function subtract(minuend: {[index: string]: any}, subtrahend: {[index: string]: any}): string[] {
// initialize empty object; we only care about the keys, the values can be anything
const obj: {[index: string]: number} = {}
// build up obj with all the properties of minuend
for (const add_key in minuend) {
if (hasOwnProperty.call(minuend, add_key) && minuend[add_key] !== undefined) {
obj[add_key] = 1
}
}
// now delete all the properties of subtrahend from obj
// (deleting a missing key has no effect)
for (const del_key in subtrahend) {
if (hasOwnProperty.call(subtrahend, del_key) && subtrahend[del_key] !== undefined) {
delete obj[del_key]
}
}
// finally, extract whatever keys remain in obj
return Object.keys(obj)
}
/**
List the keys that shared by all `objects`.
The semantics of what constitutes a "key" is described in {@link subtract}.
@param objects Array of objects to compare
@returns Array of keys that are in ("own-properties" of) every object in `objects`.
*/
export function intersection(objects: ArrayLike<{[index: string]: any}>): string[] {
const length = objects.length
// prepare empty counter to keep track of how many objects each key occurred in
const counter: {[index: string]: number} = {}
// go through each object and increment the counter for each key in that object
for (let i = 0; i < length; i++) {
const object = objects[i]
for (const key in object) {
if (hasOwnProperty.call(object, key) && object[key] !== undefined) {
counter[key] = (counter[key] || 0) + 1
}
}
}
// now delete all keys from the counter that were not seen in every object
for (const key in counter) {
if (counter[key] < length) {
delete counter[key]
}
}
// finally, extract whatever keys remain in the counter
return Object.keys(counter)
}
interface ArrayAdd { op: 'add', index: number, value: any }
interface ArrayRemove { op: 'remove', index: number }
interface ArrayReplace { op: 'replace', index: number, original: any, value: any }
/** These are not proper Operation objects, but will be converted into
Operation objects eventually. {index} indicates the actual target position,
never 'end-of-array' */
type ArrayOperation = ArrayAdd | ArrayRemove | ArrayReplace
function isArrayAdd(array_operation: ArrayOperation): array_operation is ArrayAdd {
return array_operation.op === 'add'
}
function isArrayRemove(array_operation: ArrayOperation): array_operation is ArrayRemove {
return array_operation.op === 'remove'
}
interface DynamicAlternative {
operations: ArrayOperation[]
/**
cost indicates the total cost of getting to this position.
*/
cost: number
}
function appendArrayOperation(base: DynamicAlternative, operation: ArrayOperation): DynamicAlternative {
return {
// the new operation must be pushed on the end
operations: base.operations.concat(operation),
cost: base.cost + 1,
}
}
/**
Calculate the shortest sequence of operations to get from `input` to `output`,
using a dynamic programming implementation of the Levenshtein distance algorithm.
To get from the input ABC to the output AZ we could just delete all the input
and say "insert A, insert Z" and be done with it. That's what we do if the
input is empty. But we can be smarter.
output
A Z
- -
[0] 1 2
input A | 1 [0] 1
B | 2 [1] 1
C | 3 2 [2]
1) start at 0,0 (+0)
2) keep A (+0)
3) remove B (+1)
4) replace C with Z (+1)
If the `input` (source) is empty, they'll all be in the top row, resulting in an
array of 'add' operations.
If the `output` (target) is empty, everything will be in the left column,
resulting in an array of 'remove' operations.
@returns A list of add/remove/replace operations.
*/
export function diffArrays<T>(input: T[], output: T[], ptr: Pointer, diff: Diff = diffAny): Operation[] {
// set up cost matrix (very simple initialization: just a map)
const memo: {[index: string]: DynamicAlternative} = {
'0,0': {operations: [], cost: 0},
}
/**
Calculate the cheapest sequence of operations required to get from
input.slice(0, i) to output.slice(0, j).
There may be other valid sequences with the same cost, but none cheaper.
@param i The row in the layout above
@param j The column in the layout above
@returns An object containing a list of operations, along with the total cost
of applying them (+1 for each add/remove/replace operation)
*/
function dist(i: number, j: number): DynamicAlternative {
// memoized
const memo_key = `${i},${j}`
let memoized = memo[memo_key]
if (memoized === undefined) {
// TODO: this !diff(...).length usage could/should be lazy
if (i > 0 && j > 0 && !diff(input[i - 1], output[j - 1], ptr.add(String(i - 1))).length) {
// equal (no operations => no cost)
memoized = dist(i - 1, j - 1)
}
else {
const alternatives: DynamicAlternative[] = []
if (i > 0) {
// NOT topmost row
const remove_base = dist(i - 1, j)
const remove_operation: ArrayRemove = {
op: 'remove',
index: i - 1,
}
alternatives.push(appendArrayOperation(remove_base, remove_operation))
}
if (j > 0) {
// NOT leftmost column
const add_base = dist(i, j - 1)
const add_operation: ArrayAdd = {
op: 'add',
index: i - 1,
value: output[j - 1],
}
alternatives.push(appendArrayOperation(add_base, add_operation))
}
if (i > 0 && j > 0) {
// TABLE MIDDLE
// supposing we replaced it, compute the rest of the costs:
const replace_base = dist(i - 1, j - 1)
// okay, the general plan is to replace it, but we can be smarter,
// recursing into the structure and replacing only part of it if
// possible, but to do so we'll need the original value
const replace_operation: ArrayReplace = {
op: 'replace',
index: i - 1,
original: input[i - 1],
value: output[j - 1],
}
alternatives.push(appendArrayOperation(replace_base, replace_operation))
}
// the only other case, i === 0 && j === 0, has already been memoized
// the meat of the algorithm:
// sort by cost to find the lowest one (might be several ties for lowest)
// [4, 6, 7, 1, 2].sort((a, b) => a - b) -> [ 1, 2, 4, 6, 7 ]
const best = alternatives.sort((a, b) => a.cost - b.cost)[0]
memoized = best
}
memo[memo_key] = memoized
}
return memoized
}
// handle weird objects masquerading as Arrays that don't have proper length
// properties by using 0 for everything but positive numbers
const input_length = (isNaN(input.length) || input.length <= 0) ? 0 : input.length
const output_length = (isNaN(output.length) || output.length <= 0) ? 0 : output.length
const array_operations = dist(input_length, output_length).operations
const [padded_operations] = array_operations.reduce<[Operation[], number]>(([operations, padding], array_operation) => {
if (isArrayAdd(array_operation)) {
const padded_index = array_operation.index + 1 + padding
const index_token = padded_index < (input_length + padding) ? String(padded_index) : '-'
const operation = {
op: array_operation.op,
path: ptr.add(index_token).toString(),
value: array_operation.value,
}
// padding++ // maybe only if array_operation.index > -1 ?
return [operations.concat(operation), padding + 1]
}
else if (isArrayRemove(array_operation)) {
const operation = {
op: array_operation.op,
path: ptr.add(String(array_operation.index + padding)).toString(),
}
// padding--
return [operations.concat(operation), padding - 1]
}
else { // replace
const replace_ptr = ptr.add(String(array_operation.index + padding))
const replace_operations = diff(array_operation.original, array_operation.value, replace_ptr)
return [operations.concat(...replace_operations), padding]
}
}, [[], 0])
return padded_operations
}
export function diffObjects(input: any, output: any, ptr: Pointer, diff: Diff = diffAny): Operation[] {
// if a key is in input but not output -> remove it
const operations: Operation[] = []
subtract(input, output).forEach(key => {
operations.push({op: 'remove', path: ptr.add(key).toString()})
})
// if a key is in output but not input -> add it
subtract(output, input).forEach(key => {
operations.push({op: 'add', path: ptr.add(key).toString(), value: output[key]})
})
// if a key is in both, diff it recursively
intersection([input, output]).forEach(key => {
operations.push(...diff(input[key], output[key], ptr.add(key)))
})
return operations
}
/**
`diffAny()` returns an empty array if `input` and `output` are materially equal
(i.e., would produce equivalent JSON); otherwise it produces an array of patches
that would transform `input` into `output`.
> Here, "equal" means that the value at the target location and the
> value conveyed by "value" are of the same JSON type, and that they
> are considered equal by the following rules for that type:
> o strings: are considered equal if they contain the same number of
> Unicode characters and their code points are byte-by-byte equal.
> o numbers: are considered equal if their values are numerically
> equal.
> o arrays: are considered equal if they contain the same number of
> values, and if each value can be considered equal to the value at
> the corresponding position in the other array, using this list of
> type-specific rules.
> o objects: are considered equal if they contain the same number of
> members, and if each member can be considered equal to a member in
> the other object, by comparing their keys (as strings) and their
> values (using this list of type-specific rules).
> o literals (false, true, and null): are considered equal if they are
> the same.
*/
export function diffAny(input: any, output: any, ptr: Pointer, diff: Diff = diffAny): Operation[] {
// strict equality handles literals, numbers, and strings (a sufficient but not necessary cause)
if (input === output) {
return []
}
const input_type = objectType(input)
const output_type = objectType(output)
if (input_type == 'array' && output_type == 'array') {
return diffArrays(input, output, ptr, diff)
}
if (input_type == 'object' && output_type == 'object') {
return diffObjects(input, output, ptr, diff)
}
// at this point we know that input and output are materially different;
// could be array -> object, object -> array, boolean -> undefined,
// number -> string, or some other combination, but nothing that can be split
// up into multiple patches: so `output` must replace `input` wholesale.
return [{op: 'replace', path: ptr.toString(), value: output}]
}