Skip to content

Commit

Permalink
btf: replace postorderIterator with recursion
Browse files Browse the repository at this point in the history
Throw out the painstakingly hand optimized postorder iterator in favour of
a simple recursive function. Turns out this can be a lot faster for larger
types, probably because the visited map can be allocated on the stack.

There is a small hit to very simple types, but it doesn't seem to affect
overall benchmarks too much.

    core: 1
    goos: linux
    goarch: amd64
    pkg: github.com/cilium/ebpf/btf
    cpu: 12th Gen Intel(R) Core(TM) i7-1260P
                                        │ base-po.txt │           recursive.txt            │
                                        │   sec/op    │   sec/op     vs base               │
    PostorderTraversal/single_type           32.22n ± 0%   39.69n ± 0%  +23.18% (p=0.002 n=6)
    PostorderTraversal/cycle(1)              576.3n ± 3%   112.3n ± 0%  -80.51% (p=0.002 n=6)
    PostorderTraversal/cycle(10)             2.807µ ± 1%   1.578µ ± 1%  -43.80% (p=0.002 n=6)
    PostorderTraversal/gov_update_cpu_data   2.039m ± 1%   1.795m ± 1%  -11.97% (p=0.002 n=6)
    geomean                                  3.211µ        1.885µ       -41.30%

                                        │  base-po.txt   │              recursive.txt               │
                                        │      B/op      │     B/op      vs base                    │
    PostorderTraversal/single_type             0.000 ± 0%       0.000 ± 0%         ~ (p=1.000 n=6) ¹
    PostorderTraversal/cycle(1)                264.0 ± 0%         0.0 ± 0%  -100.00% (p=0.002 n=6)
    PostorderTraversal/cycle(10)               716.5 ± 0%       326.0 ± 0%   -54.50% (p=0.002 n=6)
    PostorderTraversal/gov_update_cpu_data   345.1Ki ± 0%     334.8Ki ± 0%    -2.98% (p=0.002 n=6)
    geomean                                               ²                 ?                      ² ³
    ¹ all samples are equal
    ² summaries must be >0 to compute geomean
    ³ ratios must be >0 to compute geomean

                                        │ base-po.txt  │             recursive.txt              │
                                        │  allocs/op   │ allocs/op   vs base                    │
    PostorderTraversal/single_type           0.000 ± 0%     0.000 ± 0%         ~ (p=1.000 n=6) ¹
    PostorderTraversal/cycle(1)              4.000 ± 0%     0.000 ± 0%  -100.00% (p=0.002 n=6)
    PostorderTraversal/cycle(10)             7.000 ± 0%     1.000 ± 0%   -85.71% (p=0.002 n=6)
    PostorderTraversal/gov_update_cpu_data   132.0 ± 1%     115.0 ± 1%   -12.88% (p=0.002 n=6)
    geomean                                             ²               ?                      ² ³
    ¹ all samples are equal
    ² summaries must be >0 to compute geomean
    ³ ratios must be >0 to compute geomean

Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
  • Loading branch information
lmb committed Mar 27, 2024
1 parent a99904d commit f631fcc
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 157 deletions.
62 changes: 28 additions & 34 deletions btf/marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/binary"
"errors"
"fmt"
"maps"
"math"
"slices"
"sync"
Expand Down Expand Up @@ -38,6 +39,7 @@ type encoder struct {
buf *bytes.Buffer
strings *stringTableBuilder
ids map[Type]TypeID
visited map[Type]struct{}
lastID TypeID
}

Expand Down Expand Up @@ -163,15 +165,15 @@ func (b *Builder) Marshal(buf []byte, opts *MarshalOptions) ([]byte, error) {
buf: w,
strings: stb,
lastID: TypeID(len(b.types)),
ids: make(map[Type]TypeID, len(b.types)),
visited: make(map[Type]struct{}, len(b.types)),
ids: maps.Clone(b.stableIDs),
}

// Ensure that types are marshaled in the exact order they were Add()ed.
// Otherwise the ID returned from Add() won't match.
e.pending.Grow(len(b.types))
for _, typ := range b.types {
e.pending.Push(typ)
e.ids[typ] = b.stableIDs[typ]
}

if err := e.deflatePending(); err != nil {
Expand Down Expand Up @@ -218,16 +220,28 @@ func (b *Builder) addString(str string) (uint32, error) {
return b.strings.Add(str)
}

func (e *encoder) allocateID(typ Type) error {
id := e.lastID + 1
if id < e.lastID {
return errors.New("type ID overflow")
}
func (e *encoder) allocateIDs(root Type) (err error) {
visitInPostorder(root, e.visited, func(typ Type) bool {
if _, ok := typ.(*Void); ok {
return true
}

e.pending.Push(typ)
e.ids[typ] = id
e.lastID = id
return nil
if _, ok := e.ids[typ]; ok {
return true
}

id := e.lastID + 1
if id < e.lastID {
err = errors.New("type ID overflow")
return false
}

e.pending.Push(typ)
e.ids[typ] = id
e.lastID = id
return true
})
return
}

// id returns the ID for the given type or panics with an error.
Expand All @@ -247,33 +261,13 @@ func (e *encoder) id(typ Type) TypeID {
func (e *encoder) deflatePending() error {
// Declare root outside of the loop to avoid repeated heap allocations.
var root Type
skip := func(t Type) (skip bool) {
if t == root {
// Force descending into the current root type even if it already
// has an ID. Otherwise we miss children of types that have their
// ID pre-allocated via Add.
return false
}

_, isVoid := t.(*Void)
_, alreadyEncoded := e.ids[t]
return isVoid || alreadyEncoded
}

for !e.pending.Empty() {
root = e.pending.Shift()

// Allocate IDs for all children of typ, including transitive dependencies.
iter := postorderTraversal(root, skip)
for iter.Next() {
if iter.Type == root {
// The iterator yields root at the end, do not allocate another ID.
break
}

if err := e.allocateID(iter.Type); err != nil {
return err
}
if err := e.allocateIDs(root); err != nil {
return err
}

if err := e.deflateType(root); err != nil {
Expand Down Expand Up @@ -498,7 +492,7 @@ func (e *encoder) deflateEnum64(raw *rawType, enum *Enum) (err error) {
if enum.Signed {
placeholder.Encoding = Signed
}
if err := e.allocateID(placeholder); err != nil {
if err := e.allocateIDs(placeholder); err != nil {
return fmt.Errorf("add enum64 placeholder: %w", err)
}

Expand Down
11 changes: 3 additions & 8 deletions btf/marshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,16 +79,11 @@ func TestRoundtripVMlinux(t *testing.T) {
types[i+1], types[j+1] = types[j+1], types[i+1]
})

seen := make(map[Type]bool)
visited := make(map[Type]struct{})
limitTypes:
for i, typ := range types {
iter := postorderTraversal(typ, func(t Type) (skip bool) {
return seen[t]
})
for iter.Next() {
seen[iter.Type] = true
}
if len(seen) >= math.MaxInt16 {
visitInPostorder(typ, visited, func(t Type) bool { return true })
if len(visited) >= math.MaxInt16 {
// IDs exceeding math.MaxUint16 can trigger a bug when loading BTF.
// This can be removed once the patch lands.
// See https://lore.kernel.org/bpf/20220909092107.3035-1-oss@lmb.io/
Expand Down
162 changes: 71 additions & 91 deletions btf/traversal.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,142 +2,122 @@ package btf

import (
"fmt"

"github.com/cilium/ebpf/internal"
)

// Functions to traverse a cyclic graph of types. The below was very useful:
// https://eli.thegreenplace.net/2015/directed-graph-traversal-orderings-and-applications-to-data-flow-analysis/#post-order-and-reverse-post-order

type postorderIterator struct {
// Iteration skips types for which this function returns true.
skip func(Type) bool
// The root type. May be nil if skip(root) is true.
root Type

// Contains types which need to be either walked or yielded.
types typeDeque
// Contains a boolean whether the type has been walked or not.
walked internal.Deque[bool]
// The set of types which has been pushed onto types.
pushed map[Type]struct{}

// The current type. Only valid after a call to Next().
Type Type
}

// postorderTraversal iterates all types reachable from root by visiting the
// leaves of the graph first.
// Visit all types reachable from root in postorder.
//
// Types for which skip returns true are ignored. skip may be nil.
func postorderTraversal(root Type, skip func(Type) (skip bool)) postorderIterator {
// Avoid allocations for the common case of a skipped root.
if skip != nil && skip(root) {
return postorderIterator{}
}

po := postorderIterator{root: root, skip: skip}
children(root, po.push)

return po
}

func (po *postorderIterator) push(t *Type) {
if _, ok := po.pushed[*t]; ok || *t == po.root {
return
// Traversal stops if yield returns false.
//
// Returns false if traversal was aborted.
func visitInPostorder(root Type, visited map[Type]struct{}, yield func(typ Type) bool) bool {
if _, ok := visited[root]; ok {
return true
}

if po.skip != nil && po.skip(*t) {
return
if visited == nil {
visited = make(map[Type]struct{})
}
visited[root] = struct{}{}

if po.pushed == nil {
// Lazily allocate pushed to avoid an allocation for Types without children.
po.pushed = make(map[Type]struct{})
cont := children(root, func(child *Type) bool {
return visitInPostorder(*child, visited, yield)
})
if !cont {
return false
}

po.pushed[*t] = struct{}{}
po.types.Push(t)
po.walked.Push(false)
return yield(root)
}

// Next returns true if there is another Type to traverse.
func (po *postorderIterator) Next() bool {
for !po.types.Empty() {
t := po.types.Pop()

if !po.walked.Pop() {
// Push the type again, so that we re-evaluate it in done state
// after all children have been handled.
po.types.Push(t)
po.walked.Push(true)

// Add all direct children to todo.
children(*t, po.push)
} else {
// We've walked this type previously, so we now know that all
// children have been handled.
po.Type = *t
return true
}
}

// Only return root once.
po.Type, po.root = po.root, nil
return po.Type != nil
}

// children calls fn on each child of typ.
// children calls yield on each child of typ.
//
// Iteration stops early if fn returns false.
func children(typ Type, fn func(child *Type)) {
// Traversal stops if yield returns false.
//
// Returns false if traversal was aborted.
func children(typ Type, yield func(child *Type) bool) bool {
// Explicitly type switch on the most common types to allow the inliner to
// do its work. This avoids allocating intermediate slices from walk() on
// the heap.
switch v := typ.(type) {
case *Void, *Int, *Enum, *Fwd, *Float:
// No children to traverse.
case *Pointer:
fn(&v.Target)
if !yield(&v.Target) {
return false
}
case *Array:
fn(&v.Index)
fn(&v.Type)
if !yield(&v.Index) {
return false
}
if !yield(&v.Type) {
return false
}
case *Struct:
for i := range v.Members {
fn(&v.Members[i].Type)
if !yield(&v.Members[i].Type) {
return false
}
}
case *Union:
for i := range v.Members {
fn(&v.Members[i].Type)
if !yield(&v.Members[i].Type) {
return false
}
}
case *Typedef:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *Volatile:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *Const:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *Restrict:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *Func:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *FuncProto:
fn(&v.Return)
if !yield(&v.Return) {
return false
}
for i := range v.Params {
fn(&v.Params[i].Type)
if !yield(&v.Params[i].Type) {
return false
}
}
case *Var:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *Datasec:
for i := range v.Vars {
fn(&v.Vars[i].Type)
if !yield(&v.Vars[i].Type) {
return false
}
}
case *declTag:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *typeTag:
fn(&v.Type)
if !yield(&v.Type) {
return false
}
case *cycle:
// cycle has children, but we ignore them deliberately.
default:
panic(fmt.Sprintf("don't know how to walk Type %T", v))
}

return true
}
Loading

0 comments on commit f631fcc

Please sign in to comment.