diff --git a/go.mod b/go.mod index 7b394f3..e86098d 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,3 @@ module github.com/pointlander/peg -require github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4 - go 1.13 diff --git a/go.sum b/go.sum index 66b426d..e69de29 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +0,0 @@ -github.com/pointlander/compress v1.1.0 h1:5fUcQV2qEHvk0OpILH6eltwluN5VnwiYrkc1wjGUHnU= -github.com/pointlander/compress v1.1.0/go.mod h1:q5NXNGzqj5uPnVuhGkZfmgHqNUhf15VLi6L9kW0VEc0= -github.com/pointlander/compress v1.1.1-0.20190518213731-ff44bd196cc3 h1:hUmXhbljNFtrH5hzV9kiRoddZ5nfPTq3K0Sb2hYYiqE= -github.com/pointlander/compress v1.1.1-0.20190518213731-ff44bd196cc3/go.mod h1:q5NXNGzqj5uPnVuhGkZfmgHqNUhf15VLi6L9kW0VEc0= -github.com/pointlander/jetset v1.0.0 h1:bNlaNAX7cDPID9SlcogmXlDWq0KcRJSpKwHXaAM3bGQ= -github.com/pointlander/jetset v1.0.0/go.mod h1:zY6+WHRPB10uzTajloHtybSicLW1bf6Rz0eSaU9Deng= -github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4 h1:RHHRCZeaNyBXdYPMjZNH8/XHDBH38TZzw8izrW7dmBE= -github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4/go.mod h1:RdR1j20Aj5pB6+fw6Y9Ur7lMHpegTEjY1vc19hEZL40= diff --git a/set/set.go b/set/set.go new file mode 100644 index 0000000..a9968e0 --- /dev/null +++ b/set/set.go @@ -0,0 +1,318 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package set + +import ( + "fmt" + "math" +) + +// Node is a node +type Node struct { + Forward *Node + Backward *Node + Begin rune + End rune +} + +// Set is a set +type Set struct { + Head Node + Tail Node +} + +// NewSet returns a new set +func NewSet() *Set { + return &Set{ + Head: Node{ + Begin: math.MaxInt32, + }, + } +} + +// String returns the string of a set +func (s *Set) String() string { + codes, space := "[", "" + node := s.Head.Forward + for node.Forward != nil { + for code := node.Begin; code <= node.End; code++ { + codes += space + fmt.Sprintf("%v", code) + space = " " + } + node = node.Forward + } + return codes + "]" +} + +// Copy copies a set +func (s *Set) Copy() *Set { + set := NewSet() + if s.Head.Forward == nil { + return set + } + a, b := s.Head.Forward, &set.Head + for a.Forward != nil { + node := Node{ + Backward: b, + Begin: a.Begin, + End: a.End, + } + b.Forward = &node + a = a.Forward + b = b.Forward + } + b.Forward = &set.Tail + set.Tail.Backward = b + return set +} + +// Add adds a symbol to the set +func (s *Set) Add(a rune) { + s.AddRange(a, a) +} + +// AddRange adds to a set +func (s *Set) AddRange(begin, end rune) { + beginNode := &s.Head + for beginNode.Forward != nil && begin > beginNode.Forward.End { + beginNode = beginNode.Forward + } + endNode := &s.Tail + for endNode.Backward != nil && end < endNode.Backward.Begin { + endNode = endNode.Backward + } + if beginNode.Forward == nil && endNode.Backward == nil { + node := Node{ + Begin: begin, + End: end, + } + node.Forward = endNode + endNode.Backward = &node + node.Backward = beginNode + beginNode.Forward = &node + } else if beginNode.Forward == endNode.Backward { + if begin < beginNode.Forward.Begin { + beginNode.Forward.Begin = begin + } + if end > beginNode.Forward.End { + beginNode.Forward.End = end + } + } else if beginNode.Forward != nil && endNode.Backward == nil { + node := Node{ + Begin: begin, + End: end, + } + node.Backward = beginNode + node.Forward = beginNode.Forward + beginNode.Forward.Backward = &node + beginNode.Forward = &node + } else if beginNode.Forward == nil && endNode.Backward != nil { + node := Node{ + Begin: begin, + End: end, + } + node.Forward = endNode + node.Backward = endNode.Backward + endNode.Backward.Forward = &node + endNode.Backward = &node + } else if beginNode.Forward == endNode { + node := Node{ + Begin: begin, + End: end, + } + node.Backward = beginNode + node.Forward = beginNode.Forward + beginNode.Forward.Backward = &node + beginNode.Forward = &node + } else if beginNode == endNode.Backward { + node := Node{ + Begin: begin, + End: end, + } + node.Forward = endNode + node.Backward = endNode.Backward + endNode.Backward.Forward = &node + endNode.Backward = &node + } else { + if begin < beginNode.Forward.Begin { + beginNode.Forward.Begin = begin + } + if end > endNode.Backward.End { + beginNode.Forward.End = end + } else { + beginNode.Forward.End = endNode.Backward.End + } + node := beginNode.Forward + node.Forward = endNode + endNode.Backward = node + } +} + +// Has tests if a set has a rune +func (s *Set) Has(begin rune) bool { + beginNode := &s.Head + for beginNode.Forward != nil && begin > beginNode.Forward.End { + beginNode = beginNode.Forward + } + if beginNode.Forward == nil { + return false + } + return begin >= beginNode.Forward.Begin +} + +// Complement computes the complement of a set +func (s *Set) Complement(endSymbol rune) *Set { + set := NewSet() + if s.Len() == 0 { + node := Node{ + Forward: &set.Tail, + Backward: &set.Head, + Begin: 0, + End: endSymbol, + } + set.Head.Forward = &node + set.Tail.Backward = &node + return set + } + if s.Head.Forward.Begin == 0 && s.Head.Forward.End == endSymbol { + return set + } + a, b := &s.Head, &set.Head + if a.Forward.Begin == 0 && s.Tail.Backward.End == endSymbol { + a = a.Forward + pre := a.End + 1 + a = a.Forward + for a.Forward != nil { + node := Node{ + Backward: b, + Begin: pre, + End: a.Begin - 1, + } + pre = a.End + 1 + b.Forward = &node + a = a.Forward + b = b.Forward + } + b.Forward = &set.Tail + set.Tail.Backward = b + } else { + pre := rune(0) + a = a.Forward + for a.Forward != nil { + node := Node{ + Backward: b, + Begin: pre, + End: a.Begin - 1, + } + pre = a.End + 1 + b.Forward = &node + a = a.Forward + b = b.Forward + } + node := Node{ + Backward: b, + Begin: pre, + End: endSymbol, + } + b.Forward = &node + b = b.Forward + b.Forward = &set.Tail + set.Tail.Backward = b + } + return set +} + +// Union is the union of two sets +func (s *Set) Union(a *Set) *Set { + set := s.Copy() + node := a.Head.Forward + if node == nil { + return set + } + for node.Forward != nil { + set.AddRange(node.Begin, node.End) + node = node.Forward + } + return set +} + +// Intersects returns true if two sets intersect +func (a *Set) Intersects(b *Set) bool { + x := a.Head.Forward + if x == nil { + return false + } + for x.Forward != nil { + y := b.Head.Forward + if y == nil { + return false + } + for y.Forward != nil { + if y.Begin >= x.Begin && y.Begin <= x.End { + return true + } else if y.End >= x.Begin && y.End <= x.End { + return true + } + y = y.Forward + } + x = x.Forward + } + x = b.Head.Forward + if x == nil { + return false + } + for x.Forward != nil { + y := a.Head.Forward + if y == nil { + return false + } + for y.Forward != nil { + if y.Begin >= x.Begin && y.Begin <= x.End { + return true + } else if y.End >= x.Begin && y.End <= x.End { + return true + } + y = y.Forward + } + x = x.Forward + } + return false +} + +// Equal returns true if two sets are equal +func (s *Set) Equal(a *Set) bool { + lens, lena := s.Len(), a.Len() + if lens != lena { + return false + } else if lens == 0 && lena == 0 { + return true + } + x, y := s.Head.Forward, a.Head.Forward + for { + if x.Begin != y.Begin || x.End != y.End { + fmt.Println(x.Begin, x.End, y.Begin, y.End) + return false + } + x, y = x.Forward, y.Forward + if x == nil && y == nil { + break + } + } + return true +} + +// Len returns the size of the set +func (s *Set) Len() int { + size := 0 + if s.Head.Forward == nil { + return size + } + beginNode := s.Head.Forward + for beginNode.Forward != nil { + size += int(beginNode.End) - int(beginNode.Begin) + 1 + beginNode = beginNode.Forward + } + return size +} diff --git a/set/set_test.go b/set/set_test.go new file mode 100644 index 0000000..be75569 --- /dev/null +++ b/set/set_test.go @@ -0,0 +1,206 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package set + +import ( + "math" + "testing" +) + +func TestString(t *testing.T) { + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + + if s.String() != "[97 98 99 100 101]" { + t.Fatal("string is broken") + } +} + +func TestCopy(t *testing.T) { + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + + cp := s.Copy() + if !cp.Equal(s) { + t.Fatal("cp should be a copy of s") + } +} + +func TestAdd(t *testing.T) { + s := NewSet() + s.Add('a') + + if s.Len() != 1 { + t.Fatal("length should be 1", s.Len()) + } + + if !s.Has('a') { + t.Fatal("set should have a") + } + + s.Add('c') + s.Add('e') + s.Add('A') +} + +func TestAddRange(t *testing.T) { + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + if s.Len() != 5 { + t.Fatal("size should be 5") + } + if !s.Has('b') { + t.Fatal("set should have b") + } + if !s.Has('d') { + t.Fatal("set should have d") + } + + s.AddRange('g', 'i') + if s.Len() != 8 { + t.Log(s.Len()) + t.Fatal("size should be 7") + } + if !s.Has('h') { + t.Fatal("set should have h") + } + + s.AddRange('A', 'C') + if s.Len() != 11 { + t.Log(s.Len()) + t.Fatal("size should be 10") + } + if !s.Has('B') { + t.Fatal("set should have B") + } + + s.AddRange('A', 'z') + if s.Len() != 'z'-'A'+1 { + t.Log(s.Len()) + t.Fatalf("size should be %d", 'z'-'A'+1) + } + if !s.Has('B') { + t.Fatal("set should have B") + } +} + +func TestHas(t *testing.T) { + r := NewSet() + r.AddRange('a', 'c') + + if !r.Has('b') { + t.Fatal("set should have b") + } + + if r.Has('d') { + t.Fatal("set should not have d") + } +} + +func TestComplement(t *testing.T) { + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + s.AddRange('g', 'i') + s.AddRange('A', 'C') + c1 := s.Complement(rune(math.MaxInt32)) + c2 := c1.Complement(rune(math.MaxInt32)) + if !s.Equal(c2) { + t.Fatal("sets should be equal") + } +} + +func TestUnion(t *testing.T) { + r := NewSet() + r.AddRange('a', 'c') + r.AddRange('c', 'e') + + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + s.AddRange('g', 'i') + s.AddRange('A', 'C') + + z := NewSet() + z.AddRange('g', 'i') + z.AddRange('A', 'C') + + z = r.Union(z) + + if !z.Equal(s) { + t.Fatal("sets should be equal") + } +} + +func TestIntersects(t *testing.T) { + r := NewSet() + r.AddRange('a', 'c') + + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + s.AddRange('g', 'i') + s.AddRange('A', 'C') + + if !r.Intersects(s) { + t.Fatal("sets should intersect") + } + + z := NewSet() + z.Add('z') + + if z.Intersects(s) { + t.Fatal("sets should not intersect") + } +} + +func TestEqual(t *testing.T) { + r := NewSet() + r.AddRange('a', 'c') + r.AddRange('c', 'e') + r.AddRange('g', 'i') + + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + s.AddRange('g', 'i') + s.AddRange('A', 'C') + + if r.Equal(s) { + t.Fatal("sets should not be equal") + } + + r.AddRange('A', 'C') + + if !r.Equal(s) { + t.Fatal("sets should be equal") + } +} + +func TestLen(t *testing.T) { + r := NewSet() + r.AddRange('a', 'c') + r.AddRange('c', 'e') + r.AddRange('g', 'i') + + s := NewSet() + s.AddRange('a', 'c') + s.AddRange('c', 'e') + s.AddRange('g', 'i') + s.AddRange('A', 'C') + + if r.Len() == s.Len() { + t.Fatal("sets should not be equal in length") + } + + r.AddRange('A', 'C') + + if r.Len() != s.Len() { + t.Fatal("sets should be equal in length") + } +} diff --git a/tree/peg.go b/tree/peg.go index fdf4804..b40a873 100644 --- a/tree/peg.go +++ b/tree/peg.go @@ -18,7 +18,7 @@ import ( "strings" "text/template" - "github.com/pointlander/jetset" + "github.com/pointlander/peg/set" ) const pegHeaderTemplate = `// Code generated by {{.Generator}}. DO NOT EDIT. @@ -1042,12 +1042,16 @@ func (t *Tree) Compile(file string, args []string, out io.Writer) (err error) { }}) if t._switch { - var optimizeAlternates func(node Node) (consumes bool, s jetset.Set) + var optimizeAlternates func(node Node) (consumes bool, s *set.Set) cache, firstPass := make([]struct { reached, consumes bool - s jetset.Set + s *set.Set }, t.RulesCount), true - optimizeAlternates = func(n Node) (consumes bool, s jetset.Set) { + for i := range cache { + cache[i].s = set.NewSet() + } + optimizeAlternates = func(n Node) (consumes bool, s *set.Set) { + s = set.NewSet() /*n.debug()*/ switch n.GetType() { case TypeRule: @@ -1065,25 +1069,28 @@ func (t *Tree) Compile(file string, args []string, out io.Writer) (err error) { case TypeDot: consumes = true /* TypeDot set doesn't include the EndSymbol */ - s = s.Add(uint64(t.EndSymbol)) - s = s.Complement(uint64(t.EndSymbol)) + // s.Add(t.EndSymbol) + s = s.Complement(t.EndSymbol - 1) case TypeString, TypeCharacter: consumes = true - s = s.Add(uint64([]rune(n.String())[0])) + s.Add([]rune(n.String())[0]) case TypeRange: consumes = true element := n.Front() lower := []rune(element.String())[0] element = element.Next() upper := []rune(element.String())[0] - s = s.AddRange(uint64(lower), uint64(upper)) + s.AddRange(lower, upper) case TypeAlternate: consumes = true mconsumes, properties, c := consumes, make([]struct { intersects bool - s jetset.Set + s *set.Set }, n.Len()), 0 + for i := range properties { + properties[i].s = set.NewSet() + } for _, element := range n.Slice() { mconsumes, properties[c].s = optimizeAlternates(element) consumes = consumes && mconsumes @@ -1118,7 +1125,7 @@ func (t *Tree) Compile(file string, args []string, out io.Writer) (err error) { } else { class := &node{Type: TypeUnorderedAlternate} for d := 0; d < 256; d++ { - if properties[c].s.Has(uint64(d)) { + if properties[c].s.Has(rune(d)) { class.PushBack(&node{Type: TypeCharacter, string: string(rune(d))}) } } @@ -1158,9 +1165,11 @@ func (t *Tree) Compile(file string, args []string, out io.Writer) (err error) { case TypeSequence: classes, elements := make([]struct { - s jetset.Set + s *set.Set }, n.Len()), n.Slice() - + for i := range classes { + classes[i].s = set.NewSet() + } for c, element := range elements { consumes, classes[c].s = optimizeAlternates(element) if consumes {