Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for using cell ID in diffing and merging #639

Merged
merged 35 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
7eb83fb
Add DiffConfig
vidartf Nov 22, 2022
028b618
Optimize source diffing
vidartf Nov 22, 2022
89dfb23
Optimize mime diff
vidartf Nov 22, 2022
496df57
Skip diffing if string are equal
vidartf Nov 22, 2022
4f71a5f
Add ID check in cell comparison
vidartf Nov 22, 2022
1b318e6
Fix source string diffing
vidartf Nov 22, 2022
17caf86
Fix tests for cell IDs
vidartf Nov 22, 2022
7adf55c
Fix for generating deterministic cell IDs in test
vidartf Nov 22, 2022
a2fe203
Fix union strategy
vidartf Nov 22, 2022
f3484e9
Re-enable interleaving test
vidartf Nov 22, 2022
28936ff
Have cell prettyprint include cell ID
vidartf Nov 23, 2022
0de0827
Fix deprecation warning
vidartf Nov 23, 2022
17c105a
Some notebooks with cell ids for sanity check
vidartf Nov 23, 2022
f21c125
Tweak id pretty print
vidartf Jan 8, 2023
b00f951
Fix variable scope issue for string merging
vidartf Jul 7, 2023
e530d81
Comment copy-paste update
vidartf Jul 7, 2023
29cb8ef
Change the logic of cell ID comparison
vidartf Jul 7, 2023
7b57f97
Add some more cases in cell ID test files
vidartf Jul 7, 2023
a2ff4fe
Improve tests
vidartf Jul 7, 2023
7973968
Expose cell ID in the UI
krassowski Jun 10, 2023
7baf832
Add "Cell ID" prefix
krassowski Aug 13, 2023
591548f
Update Playwright Snapshots
github-actions[bot] Oct 27, 2023
dc6d5d7
Update Playwright Snapshots
github-actions[bot] Oct 27, 2023
51b0571
Add capability to output merge decisions to file
vidartf Oct 6, 2023
70670f7
whitelist -> allowlist
vidartf Oct 6, 2023
a6b7b65
Add bare minimum cell ID merge conflict support
vidartf Oct 27, 2023
15e6be4
Better handle merge processing error
vidartf Oct 27, 2023
37e2fb2
useful command to build all ts
vidartf Oct 27, 2023
9630642
Update tests with new behavior
vidartf Oct 30, 2023
cc0be95
Update Playwright Snapshots
github-actions[bot] Oct 30, 2023
e8e2dff
Update Playwright Snapshots
github-actions[bot] Oct 30, 2023
e247fea
Remove ID conflict for UI test merge_test1
vidartf Nov 1, 2023
efbcdff
Update Playwright Snapshots
github-actions[bot] Nov 1, 2023
08636b8
Update Playwright Snapshots
github-actions[bot] Nov 1, 2023
9884381
Fix mergeview gutter click buttons
vidartf Nov 1, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions nbdime/diffing/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

class DiffConfig:
"""Set of predicates/differs/other configs to pass around"""

def __init__(self, *, predicates=None, differs=None, atomic_paths=None):
if predicates is None:
from .generic import default_predicates
predicates = default_predicates()

if differs is None:
from .generic import default_differs
differs = default_differs()

self.predicates = predicates
self.differs = differs
self._atomic_paths = atomic_paths or {}

def diff_item_at_path(self, a, b, path):
"""Calculate the diff for path."""
self.differs[path](a, b, path=path, config=self)

Check warning on line 20 in nbdime/diffing/config.py

View check run for this annotation

Codecov / codecov/patch

nbdime/diffing/config.py#L20

Added line #L20 was not covered by tests

def is_atomic(self, x, path=None):
"Return True for values that diff should treat as a single atomic value."
try:
return self._atomic_paths[path]
except KeyError:
return not isinstance(x, (str, list, dict))

def __copy__(self):
return DiffConfig(
predicates=self.predicates.copy(),
differs=self.differs.copy(),
atomic_paths=self._atomic_paths.copy(),
)
77 changes: 38 additions & 39 deletions nbdime/diffing/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,13 @@
from ..diff_format import SequenceDiffBuilder, MappingDiffBuilder, validate_diff
from ..diff_utils import count_consumed_symbols

from .config import DiffConfig
from .sequences import diff_strings_linewise, diff_sequence
from .snakes import compute_snakes_multilevel, compute_diff_from_snakes

__all__ = ["diff"]


def is_atomic(x):
"Return True for values that diff should treat as a single atomic value."
return not isinstance(x, (str, list, dict))


def default_predicates():
return defaultdict(lambda: (operator.__eq__,))

Expand Down Expand Up @@ -74,18 +70,16 @@
return s.ratio() > threshold


def diff(a, b, path="", predicates=None, differs=None):
def diff(a, b, path="", config=None):
"Compute the diff of two json-like objects, list or dict or string."

if predicates is None:
predicates = default_predicates()
if differs is None:
differs = default_differs()
if config is None:
config = DiffConfig()

if isinstance(a, list) and isinstance(b, list):
d = diff_lists(a, b, path=path, predicates=predicates, differs=differs)
d = diff_lists(a, b, path=path, config=config)
elif isinstance(a, dict) and isinstance(b, dict):
d = diff_dicts(a, b, path=path, predicates=predicates, differs=differs)
d = diff_dicts(a, b, path=path, config=config)
elif isinstance(a, str) and isinstance(b, str):
# Don't pass differs/predicates as the only possible use case is to
# use a different character differ within each line or predicates
Expand All @@ -100,35 +94,42 @@
return d


def diff_sequence_multilevel(a, b, path="", predicates=None, differs=None):
def diff_string_lines(a, b, path="", config=None):
"""Diff two lists of strings (lines)"""

# This is mainly about short-circuiting to avoid full snakes for equal content
# since we know we can rely on __eq__ comparison
if len(a) == len(b) and a == b:
return []

return diff_strings_linewise(a, b)


def diff_sequence_multilevel(a, b, path="", config=None):
"""Compute diff of two lists with configurable behaviour."""

if predicates is None:
predicates = default_predicates()
if differs is None:
differs = default_differs()
if config is None:
config = DiffConfig()

Check warning on line 112 in nbdime/diffing/generic.py

View check run for this annotation

Codecov / codecov/patch

nbdime/diffing/generic.py#L112

Added line #L112 was not covered by tests

# Invoke multilevel snake computation algorithm
compares = predicates[path or '/']
compares = config.predicates[path or '/']
snakes = compute_snakes_multilevel(a, b, compares)

# Convert snakes to diff
return compute_diff_from_snakes(a, b, snakes, path=path, predicates=predicates, differs=differs)
return compute_diff_from_snakes(a, b, snakes, path=path, config=config)


def diff_lists(a, b, path="", predicates=None, differs=None, shallow_diff=None):
def diff_lists(a, b, path="", config=None, shallow_diff=None):
"""Compute diff of two lists with configurable behaviour."""

if predicates is None:
predicates = default_predicates()
if differs is None:
differs = default_differs()
if config is None:
config = DiffConfig()

Check warning on line 126 in nbdime/diffing/generic.py

View check run for this annotation

Codecov / codecov/patch

nbdime/diffing/generic.py#L126

Added line #L126 was not covered by tests

# If multiple compares are provided to this path, delegate to multilevel algorithm
compares = predicates[path or '/']
compares = config.predicates[path or '/']
if len(compares) > 1:
assert shallow_diff is None
return diff_sequence_multilevel(a, b, path=path, predicates=predicates, differs=differs)
return diff_sequence_multilevel(a, b, path=path, config=config)

# First make a shallow sequence diff with custom compare,
# unless it's provided for us
Expand All @@ -138,7 +139,7 @@
# Next we recurse to diff items in sequence that are considered
# similar by compares[0] in the loop below
subpath = "/".join((path, "*"))
diffit = differs[subpath]
diffit = config.differs[subpath]

# Count consumed items i,j from a,b, (i="take" in patch_list)
i, j = 0, 0
Expand All @@ -165,8 +166,8 @@
for k in range(n):
aval = a[i + k]
bval = b[j + k]
if not is_atomic(aval):
cd = diffit(aval, bval, path=subpath, predicates=predicates, differs=differs)
if not config.is_atomic(aval, subpath):
cd = diffit(aval, bval, path=subpath, config=config)
if cd:
di.patch(i + k, cd) # FIXME: Not covered in tests, create test situation

Expand All @@ -186,7 +187,7 @@
return di.validated()


def diff_dicts(a, b, path="", predicates=None, differs=None):
def diff_dicts(a, b, path="", config=None):
"""Compute diff of two dicts with configurable behaviour.

Keys in both a and b will be handled based on
Expand All @@ -197,10 +198,8 @@
Items not mentioned in diff are items where compare(x, y) return True.
For other items the diff will contain delete, insert, or replace entries.
"""
if predicates is None:
predicates = default_predicates()
if differs is None:
differs = default_differs()
if config is None:
config = DiffConfig()

Check warning on line 202 in nbdime/diffing/generic.py

View check run for this annotation

Codecov / codecov/patch

nbdime/diffing/generic.py#L202

Added line #L202 was not covered by tests

if not isinstance(a, dict) or not isinstance(b, dict):
raise TypeError('Arguments to diff_dicts need to be dicts, got %r and %r' % (a, b))
Expand All @@ -218,14 +217,14 @@
avalue = a[key]
bvalue = b[key]
# If types are the same and nonatomic, recurse
if type(avalue) is type(bvalue) and not is_atomic(avalue):
subpath = "/".join((path, key))
diffit = differs[subpath]
dd = diffit(avalue, bvalue, path=subpath, predicates=predicates, differs=differs)
subpath = "/".join((path, key))
if type(avalue) is type(bvalue) and not config.is_atomic(avalue, path=subpath):
diffit = config.differs[subpath]
dd = diffit(avalue, bvalue, path=subpath, config=config)
if dd:
di.patch(key, dd)
else:
if (path or '/') in predicates:
if (path or '/') in config.predicates:
# Could also this a warning, but I think it shouldn't be done
raise RuntimeError(
"Found predicate(s) for path {} pointing to dict entry.".format(
Expand Down
49 changes: 34 additions & 15 deletions nbdime/diffing/notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@
from ..diff_format import MappingDiffBuilder, DiffOp
from ..utils import defaultdict2

from .generic import (diff, diff_sequence_multilevel,
compare_strings_approximate)
from .config import DiffConfig
from .generic import (
diff, diff_sequence_multilevel, compare_strings_approximate,
diff_string_lines,
)

__all__ = ["diff_notebooks"]

Expand Down Expand Up @@ -323,6 +326,7 @@ def compare_cell_moderate(x, y):
This is used to align cells in the /cells list
in the second multilevel diff iteration.
"""

# Cell types must match
if x["cell_type"] != y["cell_type"]:
return False
Expand All @@ -349,6 +353,7 @@ def compare_cell_strict(x, y):
This is used to align cells in the /cells list
in the first multilevel diff iteration.
"""

# Cell types must match
if x["cell_type"] != y["cell_type"]:
return False
Expand All @@ -373,8 +378,13 @@ def compare_cell_strict(x, y):
return True


def diff_single_outputs(a, b, path="/cells/*/outputs/*",
predicates=None, differs=None):
def compare_cell_by_ids(x, y):
"""Compare cells x,y strictly using cell IDs"""
# Only consider equal if both have IDs and they match
return 'id' in x and 'id' in y and x['id'] == y['id']


def diff_single_outputs(a, b, path="/cells/*/outputs/*", config=None):
"""DiffOp a pair of output cells."""
assert path == "/cells/*/outputs/*", 'Invalid path for ouput: %r' % path
assert a.output_type == b.output_type, 'cannot diff outputs of different types'
Expand All @@ -396,20 +406,22 @@ def diff_single_outputs(a, b, path="/cells/*/outputs/*",
di.append(e)

# Only diff data:
dd = diff_mime_bundle(a.data, b.data, path=path+"/data")
dd = diff_mime_bundle(a.data, b.data, path=path+"/data", config=config)
if dd:
di.patch("data", dd)

return di.validated()
else:
return diff(a, b)
return diff(a, b, config=config)


def add_mime_diff(key, avalue, bvalue, diffbuilder):
# TODO: Handle output diffing with plugins?
# I.e. image diff, svg diff, json diff, etc.

mimetype = key.lower()
if isinstance(avalue, str) and isinstance(bvalue, str) and avalue == bvalue:
return
if any(mimetype.startswith(tm) for tm in _split_mimes):
dd = diff(avalue, bvalue)
if dd:
Expand All @@ -418,8 +430,7 @@ def add_mime_diff(key, avalue, bvalue, diffbuilder):
diffbuilder.replace(key, bvalue)


def diff_attachments(a, b, path="/cells/*/attachments",
predicates=None, differs=None):
def diff_attachments(a, b, path="/cells/*/attachments", config=None):
"""Diff a pair of attachment collections"""
assert path == "/cells/*/attachments", 'Invalid path for attachment: %r' % path

Expand Down Expand Up @@ -454,8 +465,7 @@ def diff_attachments(a, b, path="/cells/*/attachments",
return di.validated()


def diff_mime_bundle(a, b, path=None,
predicates=None, differs=None):
def diff_mime_bundle(a, b, path=None, config=None):
"""Diff a MIME bundle.

A MIME bundle has MIME types as keys, with values that are
Expand Down Expand Up @@ -517,6 +527,7 @@ def ignored_diff(*args, **kwargs):
compare_cell_approximate,
compare_cell_moderate,
compare_cell_strict,
compare_cell_by_ids,
],
# Predicates to compare output cells (within one cell) in order of low-to-high precedence
"/cells/*/outputs": [
Expand All @@ -529,12 +540,22 @@ def ignored_diff(*args, **kwargs):
notebook_differs = defaultdict2(lambda: diff, {
"/cells": diff_sequence_multilevel,
"/cells/*": diff,
"/cells/*/source": diff_string_lines,
"/cells/*/outputs": diff_sequence_multilevel,
"/cells/*/outputs/*": diff_single_outputs,
"/cells/*/attachments": diff_attachments,
})


notebook_config = DiffConfig(
predicates=notebook_predicates,
differs=notebook_differs,
atomic_paths={
"/cells/*/id": True
}
)


def reset_notebook_differ():
"""Reset the notebook_differs dictionary to default values."""
# As it is a defaultdict2, simply clear all set keys to reset:
Expand Down Expand Up @@ -587,14 +608,12 @@ def set_notebook_diff_targets(sources=True, outputs=True, attachments=True,
def diff_cells(a, b):
"This is currently just used by some tests."
path = "/cells"
return notebook_differs[path](
a, b, path=path, predicates=notebook_predicates, differs=notebook_differs)
return notebook_differs[path](a, b, path=path, config=notebook_config)


def diff_item_at_path(a, b, path):
"""Calculate the diff using the configured notebook differ for path."""
return notebook_differs[path](
a, b, path=path, predicates=notebook_predicates, differs=notebook_differs)
return notebook_differs[path](a, b, path=path, config=notebook_config)


def diff_notebooks(a, b):
Expand All @@ -604,4 +623,4 @@ def diff_notebooks(a, b):
"""
if not (isinstance(a, dict) and isinstance(b, dict)):
raise TypeError("Expected inputs to be dicts, got %r and %r" % (a, b))
return diff(a, b, path="", predicates=notebook_predicates, differs=notebook_differs)
return diff(a, b, path="", config=notebook_config)
17 changes: 11 additions & 6 deletions nbdime/diffing/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import operator
from collections import defaultdict

from .config import DiffConfig
from .seq_difflib import diff_sequence_difflib
from .seq_bruteforce import diff_sequence_bruteforce
from .seq_myers import diff_sequence_myers
Expand Down Expand Up @@ -37,7 +38,7 @@ def diff_sequence(a, b, compare=operator.__eq__):
raise RuntimeError("Unknown diff_sequence_algorithm {}.".format(diff_sequence_algorithm))


def diff_strings_by_char(a, b, path="", predicates=None, differs=None):
def diff_strings_by_char(a, b, path="", config=None):
"Compute char-based diff of two strings."
assert isinstance(a, str) and isinstance(b, str), (
'Arguments need to be string types. Got %r and %r' % (a, b))
Expand All @@ -52,12 +53,16 @@ def diff_strings_linewise(a, b):
"""
assert isinstance(a, str) and isinstance(b, str), (
'Arguments need to be string types. Got %r and %r' % (a, b))
if a == b:
return []
lines_a = a.splitlines(True)
lines_b = b.splitlines(True)

from .generic import diff_lists, compare_strings_approximate
predicates = defaultdict(lambda: [
compare_strings_approximate,
operator.__eq__])
differs = defaultdict(lambda: diff_strings_by_char)
return diff_lists(lines_a, lines_b, predicates=predicates, differs=differs)
config = DiffConfig(
predicates=defaultdict(lambda: [
compare_strings_approximate,
operator.__eq__]),
differs=defaultdict(lambda: diff_strings_by_char)
)
return diff_lists(lines_a, lines_b, config=config)
Loading
Loading