Perf: improvements to terms and built-in functions (open-policy-agent…

…#7284) Having worked on performance improvements in OPA on the side for almost a month now, there's a lot of code piling up 😅 So much that a single PR would be way too much to review. Instead, I'm splitting the work into chunks, and will submit the next PR as soon as this one is merged. Using the same benchmark as before — Regal linting itself, these new changes in total reduce the number of allocations by ~13 million, and quite a substantial amount of evaluation time saved as well. This first PR is isolated to improvements to terms, values and built-ins, and saves ~3M allocations. The details can be found below for each change, and of course in the code :) **BenchmarkRegalLintingItself-10 Before** ``` 1885978209 ns/op 3497157312 B/op 69064779 allocs/op ``` **BenchmarkRegalLintingItself-10 After** ``` 1796255084 ns/op 3452379408 B/op 66126623 allocs/op ``` **Terms** - Use pointer receivers consistently for object and set types. This allows changing the sortGuard once lock from a pointer to a non-pointer type, which is really the biggest win performance-wise in this PR. - Comparisons happen all the time, so make sure these take the shortest path possible whenever, possible, such as when one type is compared to another value of the same type. Built-in functions: **Arrays** - Both `array.concat` and `array.slice` will now return the operand on operations where the result isn't different from the input operand (like when concatenating an empty array) instead of allocating a new term/value. **Strings** - Return operand on unchanged result rather than allocating new term/value. - Where applicable, have functions take a cheaper path when string is ASCII and we can avoid the cost of rune conversion. **Crypto** - Hashing functions now optimized, spending less than half the time compared to previously. **Objects** - Avoid heap allocating result boolean escaping its scope, and instead use the return value of the `Until` function. **HTTP** - Use interned terms for keys in configuration object, avoding allocating these each time `http.send` is invoked. **Globs** - Use read/write lock to avoid contention. Use package level vars for "constant" values, avoiding them to escape to the heap each invocation. **Not directly/only related to built-in functions** - Add `ValueName` function replacing the previous `TypeName` functions for getting the name of Value's without paying for `any` interface allocations. - Add a few more interned terms. Signed-off-by: Anders Eknert <anders@styra.com>
anderseknert · Jan 20, 2025 · 75962f5 · 75962f5
1 parent 304768b
commit 75962f5
Show file tree

Hide file tree

Showing 29 changed files with 634 additions and 304 deletions.
diff --git a/internal/edittree/edittree_test.go b/internal/edittree/edittree_test.go
@@ -885,7 +885,7 @@ func parsePath(path *ast.Term) (ast.Ref, error) {
 			pathSegments = append(pathSegments, term)
 		})
 	default:
-		return nil, builtins.NewOperandErr(2, "must be one of {set, array} containing string paths or array of path segments but got %v", ast.TypeName(p))
+		return nil, builtins.NewOperandErr(2, "must be one of {set, array} containing string paths or array of path segments but got %v", ast.ValueName(p))
 	}
 
 	return pathSegments, nil

diff --git a/internal/planner/planner.go b/internal/planner/planner.go
@@ -1519,7 +1519,7 @@ func (p *Planner) planValue(t ast.Value, loc *ast.Location, iter planiter) error
 		p.loc = loc
 		return p.planObjectComprehension(v, iter)
 	default:
-		return fmt.Errorf("%v term not implemented", ast.TypeName(v))
+		return fmt.Errorf("%v term not implemented", ast.ValueName(v))
 	}
 }
 

diff --git a/v1/ast/compare.go b/v1/ast/compare.go
@@ -151,14 +151,7 @@ func Compare(a, b interface{}) int {
 		}
 		return 1
 	case Var:
-		b := b.(Var)
-		if a.Equal(b) {
-			return 0
-		}
-		if a < b {
-			return -1
-		}
-		return 1
+		return VarCompare(a, b.(Var))
 	case Ref:
 		b := b.(Ref)
 		return termSliceCompare(a, b)
@@ -181,7 +174,7 @@ func Compare(a, b interface{}) int {
 		if cmp := Compare(a.Term, b.Term); cmp != 0 {
 			return cmp
 		}
-		return Compare(a.Body, b.Body)
+		return a.Body.Compare(b.Body)
 	case *ObjectComprehension:
 		b := b.(*ObjectComprehension)
 		if cmp := Compare(a.Key, b.Key); cmp != 0 {
@@ -190,13 +183,13 @@ func Compare(a, b interface{}) int {
 		if cmp := Compare(a.Value, b.Value); cmp != 0 {
 			return cmp
 		}
-		return Compare(a.Body, b.Body)
+		return a.Body.Compare(b.Body)
 	case *SetComprehension:
 		b := b.(*SetComprehension)
 		if cmp := Compare(a.Term, b.Term); cmp != 0 {
 			return cmp
 		}
-		return Compare(a.Body, b.Body)
+		return a.Body.Compare(b.Body)
 	case Call:
 		b := b.(Call)
 		return termSliceCompare(a, b)
@@ -394,3 +387,54 @@ func withSliceCompare(a, b []*With) int {
 	}
 	return 0
 }
+
+func VarCompare(a, b Var) int {
+	if a == b {
+		return 0
+	}
+	if a < b {
+		return -1
+	}
+	return 1
+}
+
+func TermValueCompare(a, b *Term) int {
+	return a.Value.Compare(b.Value)
+}
+
+func ValueEqual(a, b Value) bool {
+	// TODO(ae): why doesn't this work the same?
+	//
+	// case interface{ Equal(Value) bool }:
+	// 	   return v.Equal(b)
+	//
+	// When put on top, golangci-lint even flags the other cases as unreachable..
+	// but TestTopdownVirtualCache will have failing test cases when we replace
+	// the other cases with the above one.. 🤔
+	switch v := a.(type) {
+	case Null:
+		return v.Equal(b)
+	case Boolean:
+		return v.Equal(b)
+	case Number:
+		return v.Equal(b)
+	case String:
+		return v.Equal(b)
+	case Var:
+		return v.Equal(b)
+	case Ref:
+		return v.Equal(b)
+	case *Array:
+		return v.Equal(b)
+	}
+
+	return a.Compare(b) == 0
+}
+
+func RefCompare(a, b Ref) int {
+	return termSliceCompare(a, b)
+}
+
+func RefEqual(a, b Ref) bool {
+	return termSliceEqual(a, b)
+}
diff --git a/v1/ast/compile.go b/v1/ast/compile.go
@@ -5500,7 +5500,7 @@ func rewriteDeclaredAssignment(g *localVarGenerator, stack *localDeclaredVars, e
 				return true
 			}
 		}
-		errs = append(errs, NewError(CompileErr, t.Location, "cannot assign to %v", TypeName(t.Value)))
+		errs = append(errs, NewError(CompileErr, t.Location, "cannot assign to %v", ValueName(t.Value)))
 		return true
 	}
 

diff --git a/v1/ast/interning.go b/v1/ast/interning.go
@@ -15,6 +15,8 @@ var (
 
 	// since this is by far the most common negative number
 	minusOneTerm = &Term{Value: Number("-1")}
+
+	InternedNullTerm = &Term{Value: Null{}}
 )
 
 // InternedBooleanTerm returns an interned term with the given boolean value.
@@ -1090,3 +1092,7 @@ var intNumberTerms = [...]*Term{
 	{Value: Number("511")},
 	{Value: Number("512")},
 }
+
+var InternedEmptyString = StringTerm("")
+
+var InternedEmptyObject = ObjectTerm()
diff --git a/v1/ast/map.go b/v1/ast/map.go
@@ -31,7 +31,7 @@ func (vs *ValueMap) MarshalJSON() ([]byte, error) {
 	vs.Iter(func(k Value, v Value) bool {
 		tmp = append(tmp, map[string]interface{}{
 			"name":  k.String(),
-			"type":  TypeName(v),
+			"type":  ValueName(v),
 			"value": v,
 		})
 		return false

diff --git a/v1/ast/parser.go b/v1/ast/parser.go
@@ -591,7 +591,7 @@ func (p *Parser) parsePackage() *Package {
 			pkg.Path[0] = DefaultRootDocument.Copy().SetLocation(v[0].Location)
 			first, ok := v[0].Value.(Var)
 			if !ok {
-				p.errorf(v[0].Location, "unexpected %v token: expecting var", TypeName(v[0].Value))
+				p.errorf(v[0].Location, "unexpected %v token: expecting var", ValueName(v[0].Value))
 				return nil
 			}
 			pkg.Path[1] = StringTerm(string(first)).SetLocation(v[0].Location)
@@ -600,7 +600,7 @@ func (p *Parser) parsePackage() *Package {
 				case String:
 					pkg.Path[i] = v[i-1]
 				default:
-					p.errorf(v[i-1].Location, "unexpected %v token: expecting string", TypeName(v[i-1].Value))
+					p.errorf(v[i-1].Location, "unexpected %v token: expecting string", ValueName(v[i-1].Value))
 					return nil
 				}
 			}
@@ -643,7 +643,7 @@ func (p *Parser) parseImport() *Import {
 		case Ref:
 			for i := 1; i < len(v); i++ {
 				if _, ok := v[i].Value.(String); !ok {
-					p.errorf(v[i].Location, "unexpected %v token: expecting string", TypeName(v[i].Value))
+					p.errorf(v[i].Location, "unexpected %v token: expecting string", ValueName(v[i].Value))
 					return nil
 				}
 			}
@@ -1717,7 +1717,7 @@ func (p *Parser) parseRef(head *Term, offset int) (term *Term) {
 	case Var, *Array, Object, Set, *ArrayComprehension, *ObjectComprehension, *SetComprehension, Call:
 		// ok
 	default:
-		p.errorf(loc, "illegal ref (head cannot be %v)", TypeName(h))
+		p.errorf(loc, "illegal ref (head cannot be %v)", ValueName(h))
 	}
 
 	ref := []*Term{head}
@@ -2318,7 +2318,7 @@ func (p *Parser) validateDefaultRuleArgs(rule *Rule) bool {
 			switch v := x.Value.(type) {
 			case Var: // do nothing
 			default:
-				p.error(rule.Loc(), fmt.Sprintf("illegal default rule (arguments cannot contain %v)", TypeName(v)))
+				p.error(rule.Loc(), fmt.Sprintf("illegal default rule (arguments cannot contain %v)", ValueName(v)))
 				valid = false
 				return true
 			}

diff --git a/v1/ast/parser_ext.go b/v1/ast/parser_ext.go
@@ -186,7 +186,7 @@ func ParseRuleFromExpr(module *Module, expr *Expr) (*Rule, error) {
 			}
 			return ParsePartialSetDocRuleFromTerm(module, term)
 		default:
-			return nil, fmt.Errorf("%v cannot be used for rule name", TypeName(v))
+			return nil, fmt.Errorf("%v cannot be used for rule name", ValueName(v))
 		}
 	}
 
@@ -277,7 +277,7 @@ func ParseCompleteDocRuleFromEqExpr(module *Module, lhs, rhs *Term) (*Rule, erro
 			return nil, fmt.Errorf("ref not ground")
 		}
 	} else {
-		return nil, fmt.Errorf("%v cannot be used for rule name", TypeName(lhs.Value))
+		return nil, fmt.Errorf("%v cannot be used for rule name", ValueName(lhs.Value))
 	}
 	head.Value = rhs
 	head.Location = lhs.Location
@@ -299,7 +299,7 @@ func ParseCompleteDocRuleFromEqExpr(module *Module, lhs, rhs *Term) (*Rule, erro
 func ParseCompleteDocRuleWithDotsFromTerm(module *Module, term *Term) (*Rule, error) {
 	ref, ok := term.Value.(Ref)
 	if !ok {
-		return nil, fmt.Errorf("%v cannot be used for rule name", TypeName(term.Value))
+		return nil, fmt.Errorf("%v cannot be used for rule name", ValueName(term.Value))
 	}
 
 	if _, ok := ref[0].Value.(Var); !ok {
@@ -328,7 +328,7 @@ func ParseCompleteDocRuleWithDotsFromTerm(module *Module, term *Term) (*Rule, er
 func ParsePartialObjectDocRuleFromEqExpr(module *Module, lhs, rhs *Term) (*Rule, error) {
 	ref, ok := lhs.Value.(Ref)
 	if !ok {
-		return nil, fmt.Errorf("%v cannot be used as rule name", TypeName(lhs.Value))
+		return nil, fmt.Errorf("%v cannot be used as rule name", ValueName(lhs.Value))
 	}
 
 	if _, ok := ref[0].Value.(Var); !ok {
@@ -363,7 +363,7 @@ func ParsePartialSetDocRuleFromTerm(module *Module, term *Term) (*Rule, error) {
 
 	ref, ok := term.Value.(Ref)
 	if !ok || len(ref) == 1 {
-		return nil, fmt.Errorf("%vs cannot be used for rule head", TypeName(term.Value))
+		return nil, fmt.Errorf("%vs cannot be used for rule head", ValueName(term.Value))
 	}
 	if _, ok := ref[0].Value.(Var); !ok {
 		return nil, fmt.Errorf("invalid rule head: %v", ref)
@@ -373,7 +373,7 @@ func ParsePartialSetDocRuleFromTerm(module *Module, term *Term) (*Rule, error) {
 	if len(ref) == 2 {
 		v, ok := ref[0].Value.(Var)
 		if !ok {
-			return nil, fmt.Errorf("%vs cannot be used for rule head", TypeName(term.Value))
+			return nil, fmt.Errorf("%vs cannot be used for rule head", ValueName(term.Value))
 		}
 		// Modify the code to add the location to the head ref
 		// and set the head ref's jsonOptions.
@@ -408,7 +408,7 @@ func ParseRuleFromCallEqExpr(module *Module, lhs, rhs *Term) (*Rule, error) {
 
 	ref, ok := call[0].Value.(Ref)
 	if !ok {
-		return nil, fmt.Errorf("%vs cannot be used in function signature", TypeName(call[0].Value))
+		return nil, fmt.Errorf("%vs cannot be used in function signature", ValueName(call[0].Value))
 	}
 	if _, ok := ref[0].Value.(Var); !ok {
 		return nil, fmt.Errorf("invalid rule head: %v", ref)

diff --git a/v1/ast/strings.go b/v1/ast/strings.go
@@ -16,3 +16,39 @@ func TypeName(x interface{}) string {
 	}
 	return strings.ToLower(reflect.Indirect(reflect.ValueOf(x)).Type().Name())
 }
+
+// ValueName returns a human readable name for the AST Value type.
+// This is preferrable over calling TypeName when the argument is known to be
+// a Value, as this doesn't require reflection (= heap allocations).
+func ValueName(x Value) string {
+	switch x.(type) {
+	case String:
+		return "string"
+	case Boolean:
+		return "boolean"
+	case Number:
+		return "number"
+	case Null:
+		return "null"
+	case Var:
+		return "var"
+	case Object:
+		return "object"
+	case Set:
+		return "set"
+	case Ref:
+		return "ref"
+	case Call:
+		return "call"
+	case *Array:
+		return "array"
+	case *ArrayComprehension:
+		return "arraycomprehension"
+	case *ObjectComprehension:
+		return "objectcomprehension"
+	case *SetComprehension:
+		return "setcomprehension"
+	}
+
+	return TypeName(x)
+}
diff --git a/v1/ast/strings_bench_test.go b/v1/ast/strings_bench_test.go
@@ -0,0 +1,29 @@
+package ast
+
+import "testing"
+
+// BenchmarkTypeName-10    32207775	   38.93 ns/op    8 B/op    1 allocs/op
+func BenchmarkTypeName(b *testing.B) {
+	term := StringTerm("foo")
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		name := TypeName(term.Value)
+		if name != "string" {
+			b.Fatalf("expected string but got %v", name)
+		}
+	}
+}
+
+// BenchmarkValueName-10    508312227    2.374 ns/op    0 B/op    0 allocs/op
+func BenchmarkValueName(b *testing.B) {
+	term := StringTerm("foo")
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		name := ValueName(term.Value)
+		if name != "string" {
+			b.Fatalf("expected string but got %v", name)
+		}
+	}
+}