Skip to content

Commit

Permalink
Perf: improvements to terms and built-in functions (open-policy-agent…
Browse files Browse the repository at this point in the history
…#7284)

Having worked on performance improvements in OPA on the side for almost
a month now, there's a lot of code piling up 😅 So much that a single PR
would be way too much to review. Instead, I'm splitting the work into
chunks, and will submit the next PR as soon as this one is merged. Using
the same benchmark as before — Regal linting itself, these new changes
in total reduce the number of allocations by ~13 million, and quite a
substantial amount of evaluation time saved as well.

This first PR is isolated to improvements to terms, values and
built-ins, and saves ~3M allocations. The details can be found below for
each change, and of course in the code :)

**BenchmarkRegalLintingItself-10 Before**
```
1885978209 ns/op    3497157312 B/op    69064779 allocs/op
```
**BenchmarkRegalLintingItself-10 After**
```
1796255084 ns/op    3452379408 B/op    66126623 allocs/op
```

**Terms**
- Use pointer receivers consistently for object and set types. This allows
  changing the sortGuard once lock from a pointer to a non-pointer type, which
  is really the biggest win performance-wise in this PR.
- Comparisons happen all the time, so make sure these take the shortest path
  possible whenever, possible, such as when one type is compared to another
  value of the same type.

Built-in functions:

**Arrays**
- Both `array.concat` and `array.slice` will now return the operand on operations
  where the result isn't different from the input operand (like when concatenating
  an empty array) instead of allocating a new term/value.

**Strings**
- Return operand on unchanged result rather than allocating new term/value.
- Where applicable, have functions take a cheaper path when string is ASCII
  and we can avoid the cost of rune conversion.

**Crypto**
- Hashing functions now optimized, spending less than half the time compared to
  previously.

**Objects**
- Avoid heap allocating result boolean escaping its scope, and instead use the
  return value of the `Until` function.

**HTTP**
- Use interned terms for keys in configuration object, avoding allocating these
  each time `http.send` is invoked.

**Globs**
- Use read/write lock to avoid contention. Use package level vars for "constant"
  values, avoiding them to escape to the heap each invocation.

**Not directly/only related to built-in functions**
- Add `ValueName` function replacing the previous `TypeName` functions for
  getting the name of Value's without paying for `any` interface allocations.
- Add a few more interned terms.

Signed-off-by: Anders Eknert <anders@styra.com>
  • Loading branch information
anderseknert authored Jan 20, 2025
1 parent 304768b commit 75962f5
Show file tree
Hide file tree
Showing 29 changed files with 634 additions and 304 deletions.
2 changes: 1 addition & 1 deletion internal/edittree/edittree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -885,7 +885,7 @@ func parsePath(path *ast.Term) (ast.Ref, error) {
pathSegments = append(pathSegments, term)
})
default:
return nil, builtins.NewOperandErr(2, "must be one of {set, array} containing string paths or array of path segments but got %v", ast.TypeName(p))
return nil, builtins.NewOperandErr(2, "must be one of {set, array} containing string paths or array of path segments but got %v", ast.ValueName(p))
}

return pathSegments, nil
Expand Down
2 changes: 1 addition & 1 deletion internal/planner/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1519,7 +1519,7 @@ func (p *Planner) planValue(t ast.Value, loc *ast.Location, iter planiter) error
p.loc = loc
return p.planObjectComprehension(v, iter)
default:
return fmt.Errorf("%v term not implemented", ast.TypeName(v))
return fmt.Errorf("%v term not implemented", ast.ValueName(v))
}
}

Expand Down
66 changes: 55 additions & 11 deletions v1/ast/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,7 @@ func Compare(a, b interface{}) int {
}
return 1
case Var:
b := b.(Var)
if a.Equal(b) {
return 0
}
if a < b {
return -1
}
return 1
return VarCompare(a, b.(Var))
case Ref:
b := b.(Ref)
return termSliceCompare(a, b)
Expand All @@ -181,7 +174,7 @@ func Compare(a, b interface{}) int {
if cmp := Compare(a.Term, b.Term); cmp != 0 {
return cmp
}
return Compare(a.Body, b.Body)
return a.Body.Compare(b.Body)
case *ObjectComprehension:
b := b.(*ObjectComprehension)
if cmp := Compare(a.Key, b.Key); cmp != 0 {
Expand All @@ -190,13 +183,13 @@ func Compare(a, b interface{}) int {
if cmp := Compare(a.Value, b.Value); cmp != 0 {
return cmp
}
return Compare(a.Body, b.Body)
return a.Body.Compare(b.Body)
case *SetComprehension:
b := b.(*SetComprehension)
if cmp := Compare(a.Term, b.Term); cmp != 0 {
return cmp
}
return Compare(a.Body, b.Body)
return a.Body.Compare(b.Body)
case Call:
b := b.(Call)
return termSliceCompare(a, b)
Expand Down Expand Up @@ -394,3 +387,54 @@ func withSliceCompare(a, b []*With) int {
}
return 0
}

func VarCompare(a, b Var) int {
if a == b {
return 0
}
if a < b {
return -1
}
return 1
}

func TermValueCompare(a, b *Term) int {
return a.Value.Compare(b.Value)
}

func ValueEqual(a, b Value) bool {
// TODO(ae): why doesn't this work the same?
//
// case interface{ Equal(Value) bool }:
// return v.Equal(b)
//
// When put on top, golangci-lint even flags the other cases as unreachable..
// but TestTopdownVirtualCache will have failing test cases when we replace
// the other cases with the above one.. 🤔
switch v := a.(type) {
case Null:
return v.Equal(b)
case Boolean:
return v.Equal(b)
case Number:
return v.Equal(b)
case String:
return v.Equal(b)
case Var:
return v.Equal(b)
case Ref:
return v.Equal(b)
case *Array:
return v.Equal(b)
}

return a.Compare(b) == 0
}

func RefCompare(a, b Ref) int {
return termSliceCompare(a, b)
}

func RefEqual(a, b Ref) bool {
return termSliceEqual(a, b)
}
2 changes: 1 addition & 1 deletion v1/ast/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -5500,7 +5500,7 @@ func rewriteDeclaredAssignment(g *localVarGenerator, stack *localDeclaredVars, e
return true
}
}
errs = append(errs, NewError(CompileErr, t.Location, "cannot assign to %v", TypeName(t.Value)))
errs = append(errs, NewError(CompileErr, t.Location, "cannot assign to %v", ValueName(t.Value)))
return true
}

Expand Down
6 changes: 6 additions & 0 deletions v1/ast/interning.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ var (

// since this is by far the most common negative number
minusOneTerm = &Term{Value: Number("-1")}

InternedNullTerm = &Term{Value: Null{}}
)

// InternedBooleanTerm returns an interned term with the given boolean value.
Expand Down Expand Up @@ -1090,3 +1092,7 @@ var intNumberTerms = [...]*Term{
{Value: Number("511")},
{Value: Number("512")},
}

var InternedEmptyString = StringTerm("")

var InternedEmptyObject = ObjectTerm()
2 changes: 1 addition & 1 deletion v1/ast/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (vs *ValueMap) MarshalJSON() ([]byte, error) {
vs.Iter(func(k Value, v Value) bool {
tmp = append(tmp, map[string]interface{}{
"name": k.String(),
"type": TypeName(v),
"type": ValueName(v),
"value": v,
})
return false
Expand Down
10 changes: 5 additions & 5 deletions v1/ast/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,7 @@ func (p *Parser) parsePackage() *Package {
pkg.Path[0] = DefaultRootDocument.Copy().SetLocation(v[0].Location)
first, ok := v[0].Value.(Var)
if !ok {
p.errorf(v[0].Location, "unexpected %v token: expecting var", TypeName(v[0].Value))
p.errorf(v[0].Location, "unexpected %v token: expecting var", ValueName(v[0].Value))
return nil
}
pkg.Path[1] = StringTerm(string(first)).SetLocation(v[0].Location)
Expand All @@ -600,7 +600,7 @@ func (p *Parser) parsePackage() *Package {
case String:
pkg.Path[i] = v[i-1]
default:
p.errorf(v[i-1].Location, "unexpected %v token: expecting string", TypeName(v[i-1].Value))
p.errorf(v[i-1].Location, "unexpected %v token: expecting string", ValueName(v[i-1].Value))
return nil
}
}
Expand Down Expand Up @@ -643,7 +643,7 @@ func (p *Parser) parseImport() *Import {
case Ref:
for i := 1; i < len(v); i++ {
if _, ok := v[i].Value.(String); !ok {
p.errorf(v[i].Location, "unexpected %v token: expecting string", TypeName(v[i].Value))
p.errorf(v[i].Location, "unexpected %v token: expecting string", ValueName(v[i].Value))
return nil
}
}
Expand Down Expand Up @@ -1717,7 +1717,7 @@ func (p *Parser) parseRef(head *Term, offset int) (term *Term) {
case Var, *Array, Object, Set, *ArrayComprehension, *ObjectComprehension, *SetComprehension, Call:
// ok
default:
p.errorf(loc, "illegal ref (head cannot be %v)", TypeName(h))
p.errorf(loc, "illegal ref (head cannot be %v)", ValueName(h))
}

ref := []*Term{head}
Expand Down Expand Up @@ -2318,7 +2318,7 @@ func (p *Parser) validateDefaultRuleArgs(rule *Rule) bool {
switch v := x.Value.(type) {
case Var: // do nothing
default:
p.error(rule.Loc(), fmt.Sprintf("illegal default rule (arguments cannot contain %v)", TypeName(v)))
p.error(rule.Loc(), fmt.Sprintf("illegal default rule (arguments cannot contain %v)", ValueName(v)))
valid = false
return true
}
Expand Down
14 changes: 7 additions & 7 deletions v1/ast/parser_ext.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ func ParseRuleFromExpr(module *Module, expr *Expr) (*Rule, error) {
}
return ParsePartialSetDocRuleFromTerm(module, term)
default:
return nil, fmt.Errorf("%v cannot be used for rule name", TypeName(v))
return nil, fmt.Errorf("%v cannot be used for rule name", ValueName(v))
}
}

Expand Down Expand Up @@ -277,7 +277,7 @@ func ParseCompleteDocRuleFromEqExpr(module *Module, lhs, rhs *Term) (*Rule, erro
return nil, fmt.Errorf("ref not ground")
}
} else {
return nil, fmt.Errorf("%v cannot be used for rule name", TypeName(lhs.Value))
return nil, fmt.Errorf("%v cannot be used for rule name", ValueName(lhs.Value))
}
head.Value = rhs
head.Location = lhs.Location
Expand All @@ -299,7 +299,7 @@ func ParseCompleteDocRuleFromEqExpr(module *Module, lhs, rhs *Term) (*Rule, erro
func ParseCompleteDocRuleWithDotsFromTerm(module *Module, term *Term) (*Rule, error) {
ref, ok := term.Value.(Ref)
if !ok {
return nil, fmt.Errorf("%v cannot be used for rule name", TypeName(term.Value))
return nil, fmt.Errorf("%v cannot be used for rule name", ValueName(term.Value))
}

if _, ok := ref[0].Value.(Var); !ok {
Expand Down Expand Up @@ -328,7 +328,7 @@ func ParseCompleteDocRuleWithDotsFromTerm(module *Module, term *Term) (*Rule, er
func ParsePartialObjectDocRuleFromEqExpr(module *Module, lhs, rhs *Term) (*Rule, error) {
ref, ok := lhs.Value.(Ref)
if !ok {
return nil, fmt.Errorf("%v cannot be used as rule name", TypeName(lhs.Value))
return nil, fmt.Errorf("%v cannot be used as rule name", ValueName(lhs.Value))
}

if _, ok := ref[0].Value.(Var); !ok {
Expand Down Expand Up @@ -363,7 +363,7 @@ func ParsePartialSetDocRuleFromTerm(module *Module, term *Term) (*Rule, error) {

ref, ok := term.Value.(Ref)
if !ok || len(ref) == 1 {
return nil, fmt.Errorf("%vs cannot be used for rule head", TypeName(term.Value))
return nil, fmt.Errorf("%vs cannot be used for rule head", ValueName(term.Value))
}
if _, ok := ref[0].Value.(Var); !ok {
return nil, fmt.Errorf("invalid rule head: %v", ref)
Expand All @@ -373,7 +373,7 @@ func ParsePartialSetDocRuleFromTerm(module *Module, term *Term) (*Rule, error) {
if len(ref) == 2 {
v, ok := ref[0].Value.(Var)
if !ok {
return nil, fmt.Errorf("%vs cannot be used for rule head", TypeName(term.Value))
return nil, fmt.Errorf("%vs cannot be used for rule head", ValueName(term.Value))
}
// Modify the code to add the location to the head ref
// and set the head ref's jsonOptions.
Expand Down Expand Up @@ -408,7 +408,7 @@ func ParseRuleFromCallEqExpr(module *Module, lhs, rhs *Term) (*Rule, error) {

ref, ok := call[0].Value.(Ref)
if !ok {
return nil, fmt.Errorf("%vs cannot be used in function signature", TypeName(call[0].Value))
return nil, fmt.Errorf("%vs cannot be used in function signature", ValueName(call[0].Value))
}
if _, ok := ref[0].Value.(Var); !ok {
return nil, fmt.Errorf("invalid rule head: %v", ref)
Expand Down
36 changes: 36 additions & 0 deletions v1/ast/strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,39 @@ func TypeName(x interface{}) string {
}
return strings.ToLower(reflect.Indirect(reflect.ValueOf(x)).Type().Name())
}

// ValueName returns a human readable name for the AST Value type.
// This is preferrable over calling TypeName when the argument is known to be
// a Value, as this doesn't require reflection (= heap allocations).
func ValueName(x Value) string {
switch x.(type) {
case String:
return "string"
case Boolean:
return "boolean"
case Number:
return "number"
case Null:
return "null"
case Var:
return "var"
case Object:
return "object"
case Set:
return "set"
case Ref:
return "ref"
case Call:
return "call"
case *Array:
return "array"
case *ArrayComprehension:
return "arraycomprehension"
case *ObjectComprehension:
return "objectcomprehension"
case *SetComprehension:
return "setcomprehension"
}

return TypeName(x)
}
29 changes: 29 additions & 0 deletions v1/ast/strings_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package ast

import "testing"

// BenchmarkTypeName-10 32207775 38.93 ns/op 8 B/op 1 allocs/op
func BenchmarkTypeName(b *testing.B) {
term := StringTerm("foo")
b.ResetTimer()

for i := 0; i < b.N; i++ {
name := TypeName(term.Value)
if name != "string" {
b.Fatalf("expected string but got %v", name)
}
}
}

// BenchmarkValueName-10 508312227 2.374 ns/op 0 B/op 0 allocs/op
func BenchmarkValueName(b *testing.B) {
term := StringTerm("foo")
b.ResetTimer()

for i := 0; i < b.N; i++ {
name := ValueName(term.Value)
if name != "string" {
b.Fatalf("expected string but got %v", name)
}
}
}
Loading

0 comments on commit 75962f5

Please sign in to comment.