-
Notifications
You must be signed in to change notification settings - Fork 397
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add the stdlib_diff tool to compare gno and go standard libraries #1425
Changes from all commits
c9534c7
a7ac60e
e72346d
02c0d0b
7a15992
1c6b428
158b41d
22f286c
640f056
6824c34
5989e22
8b350ca
6d9af8f
7c7a7ab
ff6f98f
019016f
95df663
5ffff78
b6b3ad4
8895e1c
d125294
f420a8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Stdlibs_diff | ||
|
||
Stdlibs_diff is a tool that generates an html report indicating differences between gno standard libraries and go standrad libraries | ||
|
||
## Usage | ||
|
||
Compare the `go` standard libraries the `gno` standard libraries | ||
|
||
```shell | ||
./stdlibs_diff --src <path to go standard libraries> --dst <path to gno standard libraries> --out <output directory> | ||
``` | ||
|
||
Compare the `gno` standard libraries the `go` standard libraries | ||
|
||
```shell | ||
./stdlibs_diff --src <path to gno standard libraries> --dst <path to go standard libraries> --out <output directory> --src_is_gno | ||
``` | ||
|
||
|
||
## Parameters | ||
|
||
| Flag | Description | Default value | | ||
| ---------- | ------------------------------------------------------------------ | ------------- | | ||
| src | Directory containing packages that will be compared to destination | None | | ||
| dst | Directory containing packages; used to compare src packages | None | | ||
| out | Directory where the report will be created | None | | ||
| src_is_gno | Indicates if the src parameters is the gno standard library | false | | ||
|
||
## Tips | ||
|
||
An index.html is generated at the root of the report location. Utilize it to navigate easily through the report. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
package main | ||
|
||
type Algorithm interface { | ||
Diff() (srcDiff []LineDifferrence, dstDiff []LineDifferrence) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package main | ||
|
||
type diffStatus uint | ||
|
||
const ( | ||
missingInSrc diffStatus = iota | ||
missingInDst | ||
hasDiff | ||
noDiff | ||
) | ||
|
||
func (status diffStatus) String() string { | ||
switch status { | ||
case missingInSrc: | ||
return "missing in src" | ||
case missingInDst: | ||
return "missing in dst" | ||
case hasDiff: | ||
return "files differ" | ||
case noDiff: | ||
return "files are equal" | ||
default: | ||
return "Unknown" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"strings" | ||
) | ||
|
||
// FileDiff is a struct for comparing differences between two files. | ||
type FileDiff struct { | ||
Src []string // Lines of the source file. | ||
Dst []string // Lines of the destination file. | ||
Algorithm // Algorithm used for comparison. | ||
} | ||
|
||
// LineDifferrence represents a difference in a line during file comparison. | ||
type LineDifferrence struct { | ||
Line string // The line content. | ||
Operation operation // The operation performed on the line (e.g., "add", "delete", "equal"). | ||
} | ||
|
||
// NewFileDiff creates a new FileDiff instance for comparing differences between | ||
// the specified source and destination files. It initializes the source and | ||
// destination file lines . | ||
func NewFileDiff(srcPath, dstPath string) (*FileDiff, error) { | ||
src, err := getFileLines(srcPath) | ||
if err != nil { | ||
return nil, fmt.Errorf("can't read src file: %w", err) | ||
} | ||
|
||
dst, err := getFileLines(dstPath) | ||
if err != nil { | ||
return nil, fmt.Errorf("can't read dst file: %w", err) | ||
} | ||
|
||
return &FileDiff{ | ||
Src: src, | ||
Dst: dst, | ||
Algorithm: NewMyers(src, dst), | ||
}, nil | ||
} | ||
|
||
// Differences returns the differences in lines between the source and | ||
// destination files using the configured diff algorithm. | ||
func (f *FileDiff) Differences() (src, dst []LineDifferrence) { | ||
return f.Diff() | ||
} | ||
|
||
// getFileLines reads and returns the lines of a file given its path. | ||
func getFileLines(p string) ([]string, error) { | ||
data, err := os.ReadFile(p) | ||
if err != nil { | ||
if os.IsNotExist(err) { | ||
return nil, nil | ||
} | ||
return nil, err | ||
} | ||
lines := strings.Split(strings.ReplaceAll(string(data), "\r\n", "\n"), "\n") | ||
return lines, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package main | ||
|
||
import ( | ||
"flag" | ||
"log" | ||
) | ||
|
||
func main() { | ||
var srcPath string | ||
var dstPath string | ||
var outDirectory string | ||
var srcIsGno bool | ||
|
||
flag.StringVar(&srcPath, "src", "", "Directory containing packages that will be compared to destination") | ||
flag.StringVar(&dstPath, "dst", "", "Directory containing packages; used to compare src packages") | ||
flag.StringVar(&outDirectory, "out", "", "Directory where the report will be created") | ||
flag.BoolVar(&srcIsGno, "src_is_gno", false, "If true, indicates that the src parameter corresponds to the gno standard libraries") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd avoid this in favour of just matching go and gno files ignoring their extensions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @thehowl I need a clarification on this. Do you want to compare all files ? I mean even files that are not .go or .gno ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean the following:
Ie. this is an example of which files should match:
|
||
flag.Parse() | ||
|
||
reportBuilder, err := NewReportBuilder(srcPath, dstPath, outDirectory, srcIsGno) | ||
if err != nil { | ||
log.Fatal("can't build report builder: ", err.Error()) | ||
} | ||
|
||
log.Println("Building report...") | ||
if err := reportBuilder.Build(); err != nil { | ||
log.Fatalln("can't build report: ", err.Error()) | ||
} | ||
log.Println("Report generation done!") | ||
} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was kind of a NIH moment, considering a good library already exists :) I don't have too much trouble keeping this file if you want, though, since this is just in an external tool and the code is not that large, but try to use good external dependencies for common problems if they exist, instead of just reimplementing the algorithm from wikipedia. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I missed this library ! I will integrate it in the tool. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've checked and tested the library (v1.3.1), it seems that the current version is giving wrong results. I've tested with an older version (v.1.1.0) and it seems to work but I'm not convinced about the stability. I think that it is better to keep the actual myers implementation for now and if when the lib is fixed we switch to it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Can you specify what you mean? My concern is about performance. It doesn't need to be spectacular but I'd want the tool to be able to make a full, recursive analysis of the stdlibs directories in < 2min on the machines we test on. If you're able to do it with your code, it doesn't matter and I'm fine either way |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
package main | ||
|
||
import ( | ||
"slices" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seeing as you are importing slices, please add a |
||
) | ||
|
||
var _ Algorithm = (*Myers)(nil) | ||
|
||
// Myers is a struct representing the Myers algorithm for line-based difference. | ||
type Myers struct { | ||
src []string // Lines of the source file. | ||
dst []string // Lines of the destination file. | ||
} | ||
|
||
// NewMyers creates a new Myers instance with the specified source and destination lines. | ||
func NewMyers(src, dst []string) *Myers { | ||
return &Myers{ | ||
src: src, | ||
dst: dst, | ||
} | ||
} | ||
|
||
// Do performs the Myers algorithm to find the differences between source and destination files. | ||
// It returns the differences as two slices of LineDifferrence representing source and destination changes. | ||
func (m *Myers) Diff() ([]LineDifferrence, []LineDifferrence) { | ||
var ( | ||
srcIndex, dstIndex int | ||
insertCount, deleteCount int | ||
dstDiff, srcDiff []LineDifferrence | ||
) | ||
|
||
operations := m.doMyers() | ||
|
||
for _, op := range operations { | ||
switch op { | ||
case insert: | ||
dstDiff = append(dstDiff, LineDifferrence{Line: m.dst[dstIndex], Operation: op}) | ||
srcDiff = append(srcDiff, LineDifferrence{Line: "", Operation: equal}) | ||
dstIndex++ | ||
insertCount++ | ||
continue | ||
|
||
case equal: | ||
dstDiff = append(dstDiff, LineDifferrence{Line: m.src[srcIndex], Operation: op}) | ||
srcDiff = append(srcDiff, LineDifferrence{Line: m.src[srcIndex], Operation: op}) | ||
srcIndex++ | ||
dstIndex++ | ||
continue | ||
|
||
case delete: | ||
dstDiff = append(dstDiff, LineDifferrence{Line: "", Operation: equal}) | ||
srcDiff = append(srcDiff, LineDifferrence{Line: m.src[srcIndex], Operation: op}) | ||
srcIndex++ | ||
deleteCount++ | ||
continue | ||
} | ||
} | ||
|
||
// Means that src file is empty. | ||
if insertCount == len(srcDiff) { | ||
srcDiff = make([]LineDifferrence, 0) | ||
} | ||
// Means that dst file is empty. | ||
if deleteCount == len(dstDiff) { | ||
dstDiff = make([]LineDifferrence, 0) | ||
} | ||
return srcDiff, dstDiff | ||
} | ||
|
||
// doMyers performs the Myers algorithm and returns the list of operations. | ||
func (m *Myers) doMyers() []operation { | ||
var tree []map[int]int | ||
var x, y int | ||
|
||
srcLen := len(m.src) | ||
dstLen := len(m.dst) | ||
max := srcLen + dstLen | ||
|
||
for pathLen := 0; pathLen <= max; pathLen++ { | ||
optimalCoordinates := make(map[int]int, pathLen+2) | ||
tree = append(tree, optimalCoordinates) | ||
|
||
if pathLen == 0 { | ||
commonPrefixLen := 0 | ||
for srcLen > commonPrefixLen && dstLen > commonPrefixLen && m.src[commonPrefixLen] == m.dst[commonPrefixLen] { | ||
commonPrefixLen++ | ||
} | ||
optimalCoordinates[0] = commonPrefixLen | ||
|
||
if commonPrefixLen == srcLen && commonPrefixLen == dstLen { | ||
return m.getAllOperations(tree) | ||
} | ||
continue | ||
} | ||
|
||
lastV := tree[pathLen-1] | ||
|
||
for k := -pathLen; k <= pathLen; k += 2 { | ||
if k == -pathLen || (k != pathLen && lastV[k-1] < lastV[k+1]) { | ||
x = lastV[k+1] | ||
} else { | ||
x = lastV[k-1] + 1 | ||
} | ||
|
||
y = x - k | ||
|
||
for x < srcLen && y < dstLen && m.src[x] == m.dst[y] { | ||
x, y = x+1, y+1 | ||
} | ||
|
||
optimalCoordinates[k] = x | ||
|
||
if x == srcLen && y == dstLen { | ||
return m.getAllOperations(tree) | ||
} | ||
} | ||
} | ||
|
||
return m.getAllOperations(tree) | ||
} | ||
|
||
// getAllOperations retrieves the list of operations from the calculated tree. | ||
func (m *Myers) getAllOperations(tree []map[int]int) []operation { | ||
var operations []operation | ||
var k, prevK, prevX, prevY int | ||
|
||
x := len(m.src) | ||
y := len(m.dst) | ||
|
||
for pathLen := len(tree) - 1; pathLen > 0; pathLen-- { | ||
k = x - y | ||
lastV := tree[pathLen-1] | ||
|
||
if k == -pathLen || (k != pathLen && lastV[k-1] < lastV[k+1]) { | ||
prevK = k + 1 | ||
} else { | ||
prevK = k - 1 | ||
} | ||
|
||
prevX = lastV[prevK] | ||
prevY = prevX - prevK | ||
|
||
for x > prevX && y > prevY { | ||
operations = append(operations, equal) | ||
x -= 1 | ||
y -= 1 | ||
} | ||
|
||
if x == prevX { | ||
operations = append(operations, insert) | ||
} else { | ||
operations = append(operations, delete) | ||
} | ||
|
||
x, y = prevX, prevY | ||
} | ||
|
||
if tree[0][0] != 0 { | ||
for i := 0; i < tree[0][0]; i++ { | ||
operations = append(operations, equal) | ||
} | ||
} | ||
|
||
slices.Reverse(operations) | ||
return operations | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package main | ||
|
||
// operation is an enumeration type representing different types of operations. Used in diff algorithm | ||
// to indicates differences between files. | ||
type operation uint | ||
|
||
const ( | ||
// insert represents an insertion operation. | ||
insert operation = iota + 1 | ||
// delete represents a deletion operation. | ||
delete | ||
// equal represents an equal operation. | ||
equal | ||
) | ||
|
||
// String returns a string representation of the operation. | ||
func (op operation) String() string { | ||
switch op { | ||
case insert: | ||
return "INS" | ||
case delete: | ||
return "DEL" | ||
case equal: | ||
return "EQ" | ||
default: | ||
return "UNKNOWN" | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pretty sure with package flag it's a single dash, ie.
-src
not--src