diff --git a/fgprof.go b/fgprof.go index dba1616..97787c2 100644 --- a/fgprof.go +++ b/fgprof.go @@ -20,14 +20,17 @@ func Start(w io.Writer, format Format) func() error { ticker := time.NewTicker(time.Second / hz) stopCh := make(chan struct{}) + prof := &profiler{} stackCounts := stackCounter{} + go func() { defer ticker.Stop() for { select { case <-ticker.C: - stackCounts.Update() + stacks := prof.GoroutineProfile() + stackCounts.Update(stacks) case <-stopCh: return } @@ -36,62 +39,104 @@ func Start(w io.Writer, format Format) func() error { return func() error { stopCh <- struct{}{} - return writeFormat(w, stackCounts, format, hz) + return writeFormat(w, stackCounts.HumanMap(prof.SelfFrame()), format, hz) } } -type stackCounter map[string]int +// profiler provides a convenient and performant way to access +// runtime.GoroutineProfile(). +type profiler struct { + stacks []runtime.StackRecord + selfFrame *runtime.Frame +} -func (s stackCounter) Update() { - // Determine the runtime.Frame of this func so we can hide it from our - // profiling output. - rpc := make([]uintptr, 1) - n := runtime.Callers(1, rpc) - if n < 1 { - panic("could not determine selfFrame") +// GoroutineProfile returns the stacks of all goroutines currently managed by +// the scheduler. This includes both goroutines that are currently running +// (On-CPU), as well as waiting (Off-CPU). +func (p *profiler) GoroutineProfile() []runtime.StackRecord { + if p.selfFrame == nil { + // Determine the runtime.Frame of this func so we can hide it from our + // profiling output. + rpc := make([]uintptr, 1) + n := runtime.Callers(1, rpc) + if n < 1 { + panic("could not determine selfFrame") + } + selfFrame, _ := runtime.CallersFrames(rpc).Next() + p.selfFrame = &selfFrame } - selfFrame, _ := runtime.CallersFrames(rpc).Next() - // COPYRIGHT: The code for populating `p` below is copied from - // writeRuntimeProfile in src/runtime/pprof/pprof.go. + // We don't know how many goroutines exist, so we have to grow p.stacks + // dynamically. We overshoot by 10% since it's possible that more goroutines + // are launched in between two calls to GoroutineProfile. Once p.stacks + // reaches the maximum numnber of goroutines used by the program, it will get + // reused indefinitely, eliminating GoroutineProfile calls and allocations. // - // Find out how many records there are (GoroutineProfile(nil)), - // allocate that many records, and get the data. - // There's a race—more records might be added between - // the two calls—so allocate a few extra records for safety - // and also try again if we're very unlucky. - // The loop should only execute one iteration in the common case. - var p []runtime.StackRecord - n, ok := runtime.GoroutineProfile(nil) + // TODO(fg) There might be workloads where it would be nice to shrink + // p.stacks dynamically as well, but let's not over-engineer this until we + // understand those cases better. for { - // Allocate room for a slightly bigger profile, - // in case a few more entries have been added - // since the call to ThreadProfile. - p = make([]runtime.StackRecord, n+10) - n, ok = runtime.GoroutineProfile(p) - if ok { - p = p[0:n] - break + n, ok := runtime.GoroutineProfile(p.stacks) + if !ok { + p.stacks = make([]runtime.StackRecord, int(float64(n)*1.1)) + } else { + return p.stacks[0:n] } - // Profile grew; try again. } +} -outer: +func (p *profiler) SelfFrame() *runtime.Frame { + return p.selfFrame +} + +type stringStackCounter map[string]int + +func (s stringStackCounter) Update(p []runtime.StackRecord) { for _, pp := range p { frames := runtime.CallersFrames(pp.Stack()) var stack []string for { frame, more := frames.Next() + stack = append([]string{frame.Function}, stack...) if !more { break - } else if frame.Entry == selfFrame.Entry { - continue outer } + } + key := strings.Join(stack, ";") + s[key]++ + } +} + +type stackCounter map[[32]uintptr]int +func (s stackCounter) Update(p []runtime.StackRecord) { + for _, pp := range p { + s[pp.Stack0]++ + } +} + +// @TODO(fg) create a better interface that avoids the pprof output having to +// split the stacks using the `;` separator. +func (s stackCounter) HumanMap(exclude *runtime.Frame) map[string]int { + m := map[string]int{} +outer: + for stack0, count := range s { + frames := runtime.CallersFrames((&runtime.StackRecord{Stack0: stack0}).Stack()) + + var stack []string + for { + frame, more := frames.Next() + if frame.Entry == exclude.Entry { + continue outer + } stack = append([]string{frame.Function}, stack...) + if !more { + break + } } key := strings.Join(stack, ";") - s[key]++ + m[key] = count } + return m } diff --git a/fgprof_test.go b/fgprof_test.go index 33ca847..2c90c59 100644 --- a/fgprof_test.go +++ b/fgprof_test.go @@ -23,3 +23,20 @@ func TestStart(t *testing.T) { t.Fatalf("invalid output:\n%s", out) } } + +func BenchmarkProfiler(b *testing.B) { + prof := &profiler{} + for i := 0; i < b.N; i++ { + prof.GoroutineProfile() + } +} + +func BenchmarkStackCounter(b *testing.B) { + prof := &profiler{} + stacks := prof.GoroutineProfile() + sc := stackCounter{} + b.ResetTimer() + for i := 0; i < b.N; i++ { + sc.Update(stacks) + } +} diff --git a/format.go b/format.go index 1a351e3..448e0a2 100644 --- a/format.go +++ b/format.go @@ -20,7 +20,7 @@ const ( FormatPprof Format = "pprof" ) -func writeFormat(w io.Writer, s stackCounter, f Format, hz int) error { +func writeFormat(w io.Writer, s map[string]int, f Format, hz int) error { switch f { case FormatFolded: return writeFolded(w, s) @@ -31,7 +31,7 @@ func writeFormat(w io.Writer, s stackCounter, f Format, hz int) error { } } -func writeFolded(w io.Writer, s stackCounter) error { +func writeFolded(w io.Writer, s map[string]int) error { for _, stack := range sortedKeys(s) { count := s[stack] if _, err := fmt.Fprintf(w, "%s %d\n", stack, count); err != nil { @@ -41,7 +41,7 @@ func writeFolded(w io.Writer, s stackCounter) error { return nil } -func toPprof(s stackCounter, hz int) *profile.Profile { +func toPprof(s map[string]int, hz int) *profile.Profile { functionID := uint64(1) locationID := uint64(1) line := int64(1) @@ -92,7 +92,7 @@ func toPprof(s stackCounter, hz int) *profile.Profile { return p } -func sortedKeys(s stackCounter) []string { +func sortedKeys(s map[string]int) []string { var keys []string for stack := range s { keys = append(keys, stack) diff --git a/pprof.go b/pprof.go index f0908e8..4312ea7 100644 --- a/pprof.go +++ b/pprof.go @@ -6,7 +6,7 @@ import ( "github.com/google/pprof/profile" ) -func toProfile(s stackCounter, hz int) *profile.Profile { +func toProfile(s map[string]int, hz int) *profile.Profile { functionID := uint64(1) locationID := uint64(1) diff --git a/pprof_test.go b/pprof_test.go index ad5bc5b..23371e2 100644 --- a/pprof_test.go +++ b/pprof_test.go @@ -6,7 +6,7 @@ import ( ) func Test_toProfile(t *testing.T) { - s := stackCounter{ + s := map[string]int{ "foo;bar": 2, "foo": 1, }