Skip to content

Commit

Permalink
Introduce deliberate instability to difference output (#214)
Browse files Browse the repository at this point in the history
The reporter output is documented as unstable.
The API for custom reporters also specifies that the diffing of
slices is unstable.

Introduce deliberate instability to the diffing algorithm so that
we have the flexibility to improve it in the future.
The current algorithm optimizes for speed, rather than optimality,
so there is much room for improvement.
  • Loading branch information
dsnet authored Jun 10, 2020
1 parent 0cd6169 commit 7c9a834
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
22 changes: 21 additions & 1 deletion cmp/internal/diff/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
// is more important than obtaining a minimal Levenshtein distance.
package diff

import (
"math/rand"
"time"

"github.com/google/go-cmp/cmp/internal/flags"
)

// EditType represents a single operation within an edit-script.
type EditType uint8

Expand Down Expand Up @@ -112,6 +119,8 @@ func (r Result) Similar() bool {
return r.NumSame+1 >= r.NumDiff
}

var randInt = rand.New(rand.NewSource(time.Now().Unix())).Intn(2)

// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
//
Expand Down Expand Up @@ -159,6 +168,17 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// A vertical edge is equivalent to inserting a symbol from list Y.
// A diagonal edge is equivalent to a matching symbol between both X and Y.

// To ensure flexibility in changing the algorithm in the future,
// introduce some degree of deliberate instability.
// This is achieved by fiddling the zigzag iterator to start searching
// the graph starting from the bottom-right versus than the top-left.
// The result may differ depending on the starting search location,
// but still produces a valid edit script.
zigzagInit := randInt // either 0 or 1
if flags.Deterministic {
zigzagInit = 0
}

// Invariants:
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
Expand Down Expand Up @@ -209,7 +229,7 @@ func Difference(nx, ny int, f EqualFunc) (es EditScript) {
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
for stop1, stop2, i := false, false, zigzagInit; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
Expand Down
6 changes: 6 additions & 0 deletions cmp/internal/diff/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@ import (
"strings"
"testing"
"unicode"

"github.com/google/go-cmp/cmp/internal/flags"
)

func init() {
flags.Deterministic = true
}

func TestDifference(t *testing.T) {
tests := []struct {
// Before passing x and y to Difference, we strip all spaces so that
Expand Down

0 comments on commit 7c9a834

Please sign in to comment.