From cbab8dfc18b0099b5239365ee5973a19dcacf7ae Mon Sep 17 00:00:00 2001 From: Rodolfo Carvalho Date: Fri, 27 Nov 2020 19:06:10 +0100 Subject: [PATCH] feat: Initial tracing implementation This adds the StartSpan function and related APIs to the SDK. The initial support focuses on manual instrumentation and HTTP servers based on net/http. Tracing is opt-in. Use one of the new options, TracesSampleRate or TracesSampler, when initializing the SDK to enable sending transactions and spans to Sentry. The tracing APIs rely heavily on the standard Context type from Go, and integrate with the SDKs notion of scopes. See example/http/main.go for an example of how the new APIs are meant to be used in practice. While the basic functionality should be in place, more features are planned for later. --- README.md | 87 +---- client.go | 4 + doc.go | 25 ++ example/http/main.go | 97 +++++- http/sentryhttp.go | 13 +- http/sentryhttp_test.go | 5 + hub.go | 9 + internal/crypto/randutil/randutil.go | 23 ++ internal/crypto/randutil/randutil_test.go | 16 + scope.go | 14 +- span_recorder.go | 57 ++++ traces_sampler.go | 56 +++ traces_sampler_test.go | 81 +++++ tracing.go | 398 +++++++++++++++++++++- tracing_test.go | 234 +++++++++++++ transport.go | 2 + util.go | 13 +- 17 files changed, 1028 insertions(+), 106 deletions(-) create mode 100644 internal/crypto/randutil/randutil.go create mode 100644 internal/crypto/randutil/randutil_test.go create mode 100644 span_recorder.go create mode 100644 traces_sampler.go create mode 100644 traces_sampler_test.go diff --git a/README.md b/README.md index 0a0d7fa1e..4b111476b 100644 --- a/README.md +++ b/README.md @@ -65,88 +65,13 @@ More on this in the [Configuration section of the official Sentry Go SDK documen ## Usage -The SDK must be initialized with a call to `sentry.Init`. The default transport -is asynchronous and thus most programs should call `sentry.Flush` to wait until -buffered events are sent to Sentry right before the program terminates. - -Typically, `sentry.Init` is called in the beginning of `func main` and -`sentry.Flush` is [deferred](https://golang.org/ref/spec#Defer_statements) right -after. - -> Note that if the program terminates with a call to -> [`os.Exit`](https://golang.org/pkg/os/#Exit), either directly or indirectly -> via another function like `log.Fatal`, deferred functions are not run. -> -> In that case, and if it is important for you to report outstanding events -> before terminating the program, arrange for `sentry.Flush` to be called before -> the program terminates. - -Example: - -```go -// This is an example program that makes an HTTP request and prints response -// headers. Whenever a request fails, the error is reported to Sentry. -// -// Try it by running: -// -// go run main.go -// go run main.go https://sentry.io -// go run main.go bad-url -// -// To actually report events to Sentry, set the DSN either by editing the -// appropriate line below or setting the environment variable SENTRY_DSN to -// match the DSN of your Sentry project. -package main - -import ( - "fmt" - "log" - "net/http" - "os" - "time" - - "github.com/getsentry/sentry-go" -) - -func main() { - if len(os.Args) < 2 { - log.Fatalf("usage: %s URL", os.Args[0]) - } - - err := sentry.Init(sentry.ClientOptions{ - // Either set your DSN here or set the SENTRY_DSN environment variable. - Dsn: "", - // Enable printing of SDK debug messages. - // Useful when getting started or trying to figure something out. - Debug: true, - }) - if err != nil { - log.Fatalf("sentry.Init: %s", err) - } - // Flush buffered events before the program terminates. - // Set the timeout to the maximum duration the program can afford to wait. - defer sentry.Flush(2 * time.Second) - - resp, err := http.Get(os.Args[1]) - if err != nil { - sentry.CaptureException(err) - log.Printf("reported to Sentry: %s", err) - return - } - defer resp.Body.Close() - - for header, values := range resp.Header { - for _, value := range values { - fmt.Printf("%s=%s\n", header, value) - } - } -} -``` +The SDK supports reporting errors and tracking application performance. + +To get started, have a look at one of our [examples](example/): +- [Basic error instrumentation](example/basic/main.go) +- [Error and tracing for HTTP servers](example/http/main.go) -For your convenience, this example is available at -[`example/basic/main.go`](example/basic/main.go). -There are also more examples in the -[example](example) directory. +We also provide a [complete API reference](https://pkg.go.dev/github.com/getsentry/sentry-go). For more detailed information about how to get the most out of `sentry-go`, checkout the official documentation: diff --git a/client.go b/client.go index 6b47c0790..9caba0a41 100644 --- a/client.go +++ b/client.go @@ -119,6 +119,10 @@ type ClientOptions struct { // 0.0 is treated as if it was 1.0. To drop all events, set the DSN to the // empty string. SampleRate float64 + // The sample rate for sampling traces in the range [0.0, 1.0]. + TracesSampleRate float64 + // Used to customize the sampling of traces, overrides TracesSampleRate. + TracesSampler TracesSampler // List of regexp strings that will be used to match against event's message // and if applicable, caught errors type and value. // If the match is found, then a whole event will be dropped. diff --git a/doc.go b/doc.go index d07ce2f3c..36059f507 100644 --- a/doc.go +++ b/doc.go @@ -1,6 +1,9 @@ /* Package sentry is the official Sentry SDK for Go. +Use it to report errors and track application performance through distributed +tracing. + For more information about Sentry and SDK features please have a look at the documentation site https://docs.sentry.io/platforms/go/. @@ -17,6 +20,28 @@ Sentry project. This step is accomplished through a call to sentry.Init. A more detailed yet simple example is available at https://github.com/getsentry/sentry-go/blob/master/example/basic/main.go. +Error Reporting + +The Capture* functions report messages and errors to Sentry. + + sentry.CaptureMessage(...) + sentry.CaptureException(...) + sentry.CaptureException(...) + +Use similarly named functions in the Hub for concurrent programs like web +servers. + +Performance Monitoring + +You can use Sentry to monitor your application's performance. More information +on the product page https://docs.sentry.io/product/performance/. + +The StartSpan function creates new spans. + + span := sentry.StartSpan(ctx, "operation") + ... + span.Finish() + Integrations The SDK has support for several Go frameworks, available as subpackages. diff --git a/example/http/main.go b/example/http/main.go index fa764353a..a3c98774b 100644 --- a/example/http/main.go +++ b/example/http/main.go @@ -1,5 +1,5 @@ -// This is an example web server to demonstrate how to instrument web servers -// with Sentry. +// This is an example web server to demonstrate how to instrument error and +// performance monitoring with Sentry. // // Try it by running: // @@ -20,6 +20,7 @@ import ( "image/png" "log" "net/http" + "sync" "time" "github.com/getsentry/sentry-go" @@ -51,6 +52,15 @@ func run() error { log.Printf("BeforeSend event [%s]", event.EventID) return event }, + // Specify either TracesSampleRate or set a TracesSampler to enable + // tracing. + // TracesSampleRate: 0.5, + TracesSampler: sentry.TracesSamplerFunc(func(ctx sentry.SamplingContext) bool { + // Sample all transactions for testing. On production, use + // TracesSampleRate with a rate adequate for your traffic, or use + // the ctx in TracesSampler to customize sampling per-transaction.. + return true + }), }) if err != nil { return err @@ -60,26 +70,60 @@ func run() error { defer sentry.Flush(2 * time.Second) // Main HTTP handler, renders an HTML page with a random image. + // + // A new transaction is automatically sent to Sentry when the handler is + // invoked. http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + // Use GetHubFromContext to get a hub associated with the + // current request. Hubs provide data isolation, such that tags, + // breadcrumbs and other attributes are never mixed up across + // requests. + hub := sentry.GetHubFromContext(r.Context()) if r.URL.Path != "/" { - // Use GetHubFromContext to get a hub associated with the current - // request. Hubs provide data isolation, such that tags, breadcrumbs - // and other attributes are never mixed up across requests. - hub := sentry.GetHubFromContext(r.Context()) hub.Scope().SetTag("url", r.URL.Path) hub.CaptureMessage("Page Not Found") http.NotFound(w, r) return } - err := t.Execute(w, time.Now().UnixNano()) - if err != nil { - log.Printf("[%s] %s", r.URL.Path, err) - return - } + // Set a custom transaction name: use "Home" instead of the + // default "/" based on r.URL.Path. + hub.Scope().SetTransaction("Home") + + // The next block of code shows how to instrument concurrent + // tasks. + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + span := sentry.StartSpan(r.Context(), "template.execute") + defer span.Finish() + err := t.Execute(w, time.Now().UnixNano()) + if err != nil { + log.Printf("[%s] %s", r.URL.Path, err) + return + } + }() + go func() { + defer wg.Done() + span := sentry.StartSpan(r.Context(), "sleep") + defer span.Finish() + // For demonstration only, ensure homepage loading takes + // at least 40ms. + time.Sleep(40 * time.Millisecond) + }() + wg.Wait() }) // HTTP handler for the random image. + // + // A new transaction is automatically sent to Sentry when the handler is + // invoked. We use sentry.StartSpan and span.Finish to create additional + // child spans measuring specific parts of the image computation. + // + // In general, wrap potentially slow parts of your handlers (external + // network calls, CPU-intensive tasks, etc) to help identify where time is + // spent. http.HandleFunc("/random.png", func(w http.ResponseWriter, r *http.Request) { ctx := r.Context() var cancel context.CancelFunc @@ -90,9 +134,15 @@ func run() error { } q := r.URL.Query().Get("q") - img := NewImage(ctx, 128, 128, []byte(q)) + span := sentry.StartSpan(ctx, "NewImage") + img := NewImage(span.Context(), 128, 128, []byte(q)) + span.Finish() + + span = sentry.StartSpan(ctx, "png.Encode") err := png.Encode(w, img) + span.Finish() + if err != nil { log.Printf("[%s] %s", r.URL.Path, err) hub := sentry.GetHubFromContext(ctx) @@ -110,7 +160,8 @@ func run() error { log.Printf("Serving http://%s", *addr) - // Wrap the default mux with Sentry to capture panics and report errors. + // Wrap the default mux with Sentry to capture panics, report errors and + // measure performance. // // Alternatively, you can also wrap individual handlers if you need to use // different options for different parts of your app. @@ -144,15 +195,35 @@ img { // NewImage returns a random image based on seed, with the given width and // height. +// +// NewImage uses the context to create spans that measure the performance of its +// internal parts. func NewImage(ctx context.Context, width, height int, seed []byte) image.Image { + span := sentry.StartSpan(ctx, "sha256") b := sha256.Sum256(seed) + span.Finish() img := image.NewGray(image.Rect(0, 0, width, height)) + span = sentry.StartSpan(ctx, "img") + defer span.Finish() for i := 0; i < len(img.Pix); i += len(b) { select { case <-ctx.Done(): // Context canceled, abort image generation. + + // Set a tag on the current span. + span.SetTag("canceled", "yes") + // Set a tag on the current transaction. + // + // Note that spans are not designed to be mutated from + // concurrent goroutines. If multiple goroutines may try + // to mutate a span/transaction, for example to set + // tags, use a mutex to synchronize changes, or use a + // channel to communicate the desired changes back into + // the goroutine where the span was created. + sentry.TransactionFromContext(ctx).SetTag("img.canceled", "yes") + // Spot the bug: the returned image cannot be encoded as PNG and // will cause an error that will be reported to Sentry. return img.SubImage(image.Rect(0, 0, 0, 0)) diff --git a/http/sentryhttp.go b/http/sentryhttp.go index 3957fd979..948ef940c 100644 --- a/http/sentryhttp.go +++ b/http/sentryhttp.go @@ -83,11 +83,20 @@ func (h *Handler) handle(handler http.Handler) http.HandlerFunc { hub := sentry.GetHubFromContext(ctx) if hub == nil { hub = sentry.CurrentHub().Clone() + ctx = sentry.SetHubOnContext(ctx, hub) } + span := sentry.StartSpan(ctx, "http.server", + sentry.TransactionName(r.URL.Path), + sentry.ContinueFromRequest(r), + ) + defer span.Finish() + r = r.WithContext(span.Context()) hub.Scope().SetRequest(r) - ctx = sentry.SetHubOnContext(ctx, hub) defer h.recoverWithSentry(hub, r) - handler.ServeHTTP(w, r.WithContext(ctx)) + // TODO(tracing): use custom response writer to intercept + // response. Use HTTP status to add tag to transaction; set span + // status. + handler.ServeHTTP(w, r) } } diff --git a/http/sentryhttp_test.go b/http/sentryhttp_test.go index 307c1fbef..b0e6d2667 100644 --- a/http/sentryhttp_test.go +++ b/http/sentryhttp_test.go @@ -43,6 +43,7 @@ func TestIntegration(t *testing.T) { "User-Agent": "Go-http-client/1.1", }, }, + Transaction: "/panic", }, }, { @@ -71,6 +72,7 @@ func TestIntegration(t *testing.T) { "User-Agent": "Go-http-client/1.1", }, }, + Transaction: "/post", }, }, { @@ -91,6 +93,7 @@ func TestIntegration(t *testing.T) { "User-Agent": "Go-http-client/1.1", }, }, + Transaction: "/get", }, }, { @@ -120,6 +123,7 @@ func TestIntegration(t *testing.T) { "User-Agent": "Go-http-client/1.1", }, }, + Transaction: "/post/large", }, }, { @@ -145,6 +149,7 @@ func TestIntegration(t *testing.T) { "User-Agent": "Go-http-client/1.1", }, }, + Transaction: "/post/body-ignored", }, }, } diff --git a/hub.go b/hub.go index a85d4a016..f055c5652 100644 --- a/hub.go +++ b/hub.go @@ -367,6 +367,15 @@ func GetHubFromContext(ctx context.Context) *Hub { return nil } +// hubFromContext returns either a hub stored in the context or the current hub. +// The return value is guaranteed to be non-nil, unlike GetHubFromContext. +func hubFromContext(ctx context.Context) *Hub { + if hub, ok := ctx.Value(HubContextKey).(*Hub); ok { + return hub + } + return currentHub +} + // SetHubOnContext stores given Hub instance on the Context struct and returns a new Context. func SetHubOnContext(ctx context.Context, hub *Hub) context.Context { return context.WithValue(ctx, HubContextKey, hub) diff --git a/internal/crypto/randutil/randutil.go b/internal/crypto/randutil/randutil.go new file mode 100644 index 000000000..c37cebc83 --- /dev/null +++ b/internal/crypto/randutil/randutil.go @@ -0,0 +1,23 @@ +package randutil + +import ( + "crypto/rand" + "encoding/binary" +) + +const ( + floatMax = 1 << 53 + floatMask = floatMax - 1 +) + +// Float64 returns a cryptographically secure random number in [0.0, 1.0). +func Float64() float64 { + // The implementation is, in essence: + // return float64(rand.Int63n(1<<53)) / (1<<53) + b := make([]byte, 8) + _, err := rand.Read(b) + if err != nil { + panic(err) + } + return float64(binary.LittleEndian.Uint64(b)&floatMask) / floatMax +} diff --git a/internal/crypto/randutil/randutil_test.go b/internal/crypto/randutil/randutil_test.go new file mode 100644 index 000000000..6f6a5a4a9 --- /dev/null +++ b/internal/crypto/randutil/randutil_test.go @@ -0,0 +1,16 @@ +package randutil + +import ( + "testing" +) + +func TestFloat64(t *testing.T) { + const total = 1 << 24 + for i := 0; i < total; i++ { + n := Float64() + if !(n >= 0 && n < 1) { + t.Fatalf("out of range [0.0, 1.0): %f", n) + } + } + // TODO: verify that distribution is uniform +} diff --git a/scope.go b/scope.go index d25522286..1fa6ce8c1 100644 --- a/scope.go +++ b/scope.go @@ -278,12 +278,20 @@ func (scope *Scope) SetLevel(level Level) { scope.level = level } -// SetTransaction sets new transaction name for the current transaction. -func (scope *Scope) SetTransaction(transactionName string) { +// SetTransaction sets the transaction name for the current transaction. +func (scope *Scope) SetTransaction(name string) { scope.mu.Lock() defer scope.mu.Unlock() - scope.transaction = transactionName + scope.transaction = name +} + +// Transaction returns the transaction name for the current transaction. +func (scope *Scope) Transaction() (name string) { + scope.mu.RLock() + defer scope.mu.RUnlock() + + return scope.transaction } // Clone returns a copy of the current scope with all data copied over. diff --git a/span_recorder.go b/span_recorder.go new file mode 100644 index 000000000..9e611ca62 --- /dev/null +++ b/span_recorder.go @@ -0,0 +1,57 @@ +package sentry + +import ( + "sync" +) + +// maxSpans limits the number of recorded spans per transaction. The limit is +// meant to bound memory usage and prevent too large transaction events that +// would be rejected by Sentry. +const maxSpans = 1000 + +// A spanRecorder stores a span tree that makes up a transaction. Safe for +// concurrent use. It is okay to add child spans from multiple goroutines. +type spanRecorder struct { + mu sync.Mutex + spans []*Span + overflowOnce sync.Once +} + +// record stores a span. The first stored span is assumed to be the root of a +// span tree. +func (r *spanRecorder) record(s *Span) { + r.mu.Lock() + defer r.mu.Unlock() + if len(r.spans) >= maxSpans { + r.overflowOnce.Do(func() { + root := r.spans[0] + Logger.Printf("Too many spans: dropping spans from transaction with TraceID=%s SpanID=%s limit=%d", + root.TraceID, root.SpanID, maxSpans) + }) + // TODO(tracing): mark the transaction event in some way to + // communicate that spans were dropped. + return + } + r.spans = append(r.spans, s) +} + +// root returns the first recorded span. Returns nil if none have been recorded. +func (r *spanRecorder) root() *Span { + r.mu.Lock() + defer r.mu.Unlock() + if len(r.spans) == 0 { + return nil + } + return r.spans[0] +} + +// children returns a list of all recorded spans, except the root. Returns nil +// if there are no children. +func (r *spanRecorder) children() []*Span { + r.mu.Lock() + defer r.mu.Unlock() + if len(r.spans) < 2 { + return nil + } + return r.spans[1:] +} diff --git a/traces_sampler.go b/traces_sampler.go new file mode 100644 index 000000000..391e5fd43 --- /dev/null +++ b/traces_sampler.go @@ -0,0 +1,56 @@ +package sentry + +import ( + "fmt" + + "github.com/getsentry/sentry-go/internal/crypto/randutil" +) + +// A TracesSampler makes sampling decisions for spans. +// +// In addition to the sampling context passed to the Sample method, +// implementations may keep and use internal state to make decisions. +// +// Sampling is one of the last steps when starting a new span, such that the +// sampler can inspect most of the state of the span to make a decision. +// +// Implementations must be safe for concurrent use by multiple goroutines. +type TracesSampler interface { + Sample(ctx SamplingContext) bool +} + +// A SamplingContext is passed to a TracesSampler to determine a sampling +// decision. +type SamplingContext struct { + Span *Span // The current span, always non-nil. + Parent *Span // The parent span, may be nil. +} + +// The TracesSamplerFunc type is an adapter to allow the use of ordinary +// functions as a TracesSampler. +type TracesSamplerFunc func(ctx SamplingContext) bool + +var _ TracesSampler = TracesSamplerFunc(nil) + +func (f TracesSamplerFunc) Sample(ctx SamplingContext) bool { + return f(ctx) +} + +// fixedRateSampler is a TracesSampler that samples root spans randomly at a +// uniform rate. +type fixedRateSampler struct { + Rate float64 +} + +var _ TracesSampler = (*fixedRateSampler)(nil) + +func (s *fixedRateSampler) Sample(ctx SamplingContext) bool { + if s.Rate < 0 || s.Rate > 1 { + panic(fmt.Errorf("sampling rate out of range [0.0, 1.0]: %f", s.Rate)) + } + return randutil.Float64() < s.Rate +} + +// TODO(tracing): implement and export basic TracesSampler implementations: +// parent-based, span ID / trace ID based, etc. It should be possible to compose +// parent-based with other samplers. diff --git a/traces_sampler_test.go b/traces_sampler_test.go new file mode 100644 index 000000000..e1f47b062 --- /dev/null +++ b/traces_sampler_test.go @@ -0,0 +1,81 @@ +package sentry + +import ( + "fmt" + "sync" + "testing" +) + +func TestFixedRateSampler(t *testing.T) { + ctx := NewTestContext(ClientOptions{}) + rootSpan := StartSpan(ctx, "root") + + s := &fixedRateSampler{} + + // TODO(tracing): for now the fixedRateSampler ignores the parent + // decision. Before exporting it, we may consider whether it should + // inherit the parent decision, or if we need to export two variants of + // the sampler, or have users compose samplers. + // t.Run("Inheritance", func(t *testing.T) { + // childSpan := rootSpan.StartChild("child") + // // The sample decision for a child span should inherit its parent decision. + // for want, parentDecision := range [...]Sampled{SampledUndefined, SampledFalse, SampledTrue} { + // rootSpan.Sampled = parentDecision + // for _, rate := range []float64{0.0, 0.25, 0.5, 0.75, 1.0} { + // s.Rate = rate + // if got := repeatedSample(s, SamplingContext{Span: childSpan, Parent: rootSpan}, 10000); got != float64(want) { + // t.Errorf("got childSpan sample rate %.2f, want %d", got, want) + // } + // } + // } + // }) + + t.Run("UniformRate", func(t *testing.T) { + // The sample decision for the root span should observe the configured + // rate. + tests := []struct { + Rate float64 + Tolerance float64 + }{ + {0.0, 0.0}, + {0.25, 0.1}, + {0.5, 0.1}, + {0.75, 0.1}, + {1.0, 0.0}, + } + for _, tt := range tests { + tt := tt + t.Run(fmt.Sprint(tt.Rate), func(t *testing.T) { + s.Rate = tt.Rate + got := repeatedSample(s, SamplingContext{Span: rootSpan}, 10000) + if got < tt.Rate*(1-tt.Tolerance) || got > tt.Rate*(1+tt.Tolerance) { + t.Errorf("got rootSpan sample rate %.2f, want %.2f±%.0f%%", got, tt.Rate, 100*tt.Tolerance) + } + }) + } + }) + + t.Run("Concurrency", func(t *testing.T) { + // This test is for use with -race to catch data races. + var wg sync.WaitGroup + s.Rate = 0.5 + for i := 0; i < 32; i++ { + wg.Add(1) + go func() { + defer wg.Done() + repeatedSample(s, SamplingContext{Span: rootSpan}, 10000) + }() + } + wg.Wait() + }) +} + +func repeatedSample(sampler TracesSampler, ctx SamplingContext, count int) (observedRate float64) { + var n float64 + for i := 0; i < count; i++ { + if sampler.Sample(ctx) { + n++ + } + } + return n / float64(count) +} diff --git a/tracing.go b/tracing.go index b59acd662..accf38c59 100644 --- a/tracing.go +++ b/tracing.go @@ -1,15 +1,23 @@ package sentry import ( + "context" + "crypto/rand" "encoding/hex" "encoding/json" + "fmt" + "net/http" + "regexp" + "strings" "time" ) // A Span is the building block of a Sentry transaction. Spans build up a tree // structure of timed operations. The span tree makes up a transaction event // that is sent to Sentry when the root span is finished. -type Span struct { +// +// Spans must be started with either StartSpan or Span.StartChild. +type Span struct { //nolint: maligned // prefer readability over optimal memory layout (see note below *) TraceID TraceID `json:"trace_id"` SpanID SpanID `json:"span_id"` ParentSpanID SpanID `json:"parent_span_id"` @@ -20,8 +28,35 @@ type Span struct { StartTime time.Time `json:"start_timestamp"` EndTime time.Time `json:"timestamp"` Data map[string]interface{} `json:"data,omitempty"` + + Sampled Sampled `json:"-"` + + // ctx is the context where the span was started. Always non-nil. + ctx context.Context + + // parent refers to the immediate local parent span. A remote parent span is + // only referenced by setting ParentSpanID. + parent *Span + + // isTransaction is true only for the root span of a local span tree. The + // root span is the first span started in a context. Note that a local root + // span may have a remote parent belonging to the same trace, therefore + // isTransaction depends on ctx and not on parent. + isTransaction bool + + // recorder stores all spans in a transaction. Guaranteed to be non-nil. + recorder *spanRecorder } +// (*) Note on maligned: +// +// We prefer readability over optimal memory layout. If we ever decide to +// reorder fields, we can use a tool: +// +// go run honnef.co/go/tools/cmd/structlayout -json . Span | go run honnef.co/go/tools/cmd/structlayout-optimize +// +// Other structs would deserve reordering as well, for example Event. + // TODO: make Span.Tags and Span.Data opaque types (struct{unexported []slice}). // An opaque type allows us to add methods and make it more convenient to use // than maps, because maps require careful nil checks to use properly or rely on @@ -29,6 +64,204 @@ type Span struct { // tags/data. For Span.Data, must gracefully handle values that cannot be // marshaled into JSON (see transport.go:getRequestBodyFromEvent). +// StartSpan starts a new span to describe an operation. The new span will be a +// child of the last span stored in ctx, if any. +// +// One or more options can be used to modify the span properties. Typically one +// option as a function literal is enough. Combining multiple options can be +// useful to define and reuse specific properties with named functions. +// +// Caller should call the Finish method on the span to mark its end. Finishing a +// root span sends the span and all of its children, recursively, as a +// transaction to Sentry. +func StartSpan(ctx context.Context, operation string, options ...SpanOption) *Span { + parent, hasParent := ctx.Value(spanContextKey{}).(*Span) + var span Span + span = Span{ + // defaults + Op: operation, + StartTime: time.Now(), + + ctx: context.WithValue(ctx, spanContextKey{}, &span), + parent: parent, + isTransaction: !hasParent, + } + if hasParent { + span.TraceID = parent.TraceID + } else { + // Implementation note: + // + // While math/rand is ~2x faster than crypto/rand (exact + // difference depends on hardware / OS), crypto/rand is probably + // fast enough and a safer choice. + // + // For reference, OpenTelemetry [1] uses crypto/rand to seed + // math/rand. AFAICT this approach does not preserve the + // properties from crypto/rand that make it suitable for + // cryptography. While it might be debatable whether those + // properties are important for us here, again, we're taking the + // safer path. + // + // See [2a] & [2b] for a discussion of some of the properties we + // obtain by using crypto/rand and [3a] & [3b] for why we avoid + // math/rand. + // + // Because the math/rand seed has only 64 bits (int64), if the + // first thing we do after seeding an RNG is to read in a random + // TraceID, there are only 2^64 possible values. Compared to + // UUID v4 that have 122 random bits, there is a much greater + // chance of collision [4a] & [4b]. + // + // [1]: https://github.com/open-telemetry/opentelemetry-go/blob/958041ddf619a128/sdk/trace/trace.go#L25-L31 + // [2a]: https://security.stackexchange.com/q/120352/246345 + // [2b]: https://security.stackexchange.com/a/120365/246345 + // [3a]: https://github.com/golang/go/issues/11871#issuecomment-126333686 + // [3b]: https://github.com/golang/go/issues/11871#issuecomment-126357889 + // [4a]: https://en.wikipedia.org/wiki/Universally_unique_identifier#Collisions + // [4b]: https://www.wolframalpha.com/input/?i=sqrt%282*2%5E64*ln%281%2F%281-0.5%29%29%29 + _, err := rand.Read(span.TraceID[:]) + if err != nil { + panic(err) + } + } + _, err := rand.Read(span.SpanID[:]) + if err != nil { + panic(err) + } + if hasParent { + span.ParentSpanID = parent.SpanID + } + + // Apply options to override defaults. + for _, option := range options { + option(&span) + } + + if span.sample() { + span.Sampled = SampledTrue + } else { + span.Sampled = SampledFalse + } + + if hasParent { + span.recorder = parent.spanRecorder() + } else { + span.recorder = &spanRecorder{} + } + span.recorder.record(&span) + + // Update scope so that all events include a trace context, allowing + // Sentry to correlate errors to transactions/spans. + hubFromContext(ctx).Scope().SetContext("trace", span.traceContext()) + + return &span +} + +// Finish sets the span's end time, unless already set. If the span is the root +// of a span tree, Finish sends the span tree to Sentry as a transaction. +func (s *Span) Finish() { + // TODO(tracing): maybe make Finish run at most once, such that + // (incorrectly) calling it twice never double sends to Sentry. + + if s.EndTime.IsZero() { + s.EndTime = monotonicTimeSince(s.StartTime) + } + if s.Sampled != SampledTrue { + return + } + event := s.toEvent() + if event == nil { + return + } + + // TODO(tracing): add breadcrumbs + // (see https://github.com/getsentry/sentry-python/blob/f6f3525f8812f609/sentry_sdk/tracing.py#L372) + + hub := hubFromContext(s.ctx) + if hub.Scope().Transaction() == "" { + Logger.Printf("Missing transaction name for span with op = %q", s.Op) + } + hub.CaptureEvent(event) +} + +// Context returns the context containing the span. +func (s *Span) Context() context.Context { return s.ctx } + +// StartChild starts a new child span. +// +// The call span.StartChild(operation, options...) is a shortcut for +// StartSpan(span.Context(), operation, options...). +func (s *Span) StartChild(operation string, options ...SpanOption) *Span { + return StartSpan(s.Context(), operation, options...) +} + +// SetTag sets a tag on the span. It is recommended to use SetTag instead of +// accessing the tags map directly as SetTag takes care of initializing the map +// when necessary. +func (s *Span) SetTag(name, value string) { + if s.Tags == nil { + s.Tags = make(map[string]string) + } + s.Tags[name] = value +} + +// TODO(tracing): maybe add shortcuts to get/set transaction name. Right now the +// transaction name is in the Scope, as it has existed there historically, prior +// to tracing. +// +// See Scope.Transaction() and Scope.SetTransaction(). +// +// func (s *Span) TransactionName() string +// func (s *Span) SetTransactionName(name string) + +// ToSentryTrace returns the trace propagation value used with the sentry-trace +// HTTP header. +func (s *Span) ToSentryTrace() string { + // TODO(tracing): add instrumentation for outgoing HTTP requests using + // ToSentryTrace. + var b strings.Builder + fmt.Fprintf(&b, "%s-%s", s.TraceID.Hex(), s.SpanID.Hex()) + switch s.Sampled { + case SampledTrue: + b.WriteString("-1") + case SampledFalse: + b.WriteString("-0") + } + return b.String() +} + +// sentryTracePattern matches either +// +// TRACE_ID - SPAN_ID +// [[:xdigit:]]{32}-[[:xdigit:]]{16} +// +// or +// +// TRACE_ID - SPAN_ID - SAMPLED +// [[:xdigit:]]{32}-[[:xdigit:]]{16}-[01] +var sentryTracePattern = regexp.MustCompile(`^([[:xdigit:]]{32})-([[:xdigit:]]{16})(?:-([01]))?$`) + +// updateFromSentryTrace parses a sentry-trace HTTP header (as returned by +// ToSentryTrace) and updates fields of the span. If the header cannot be +// recognized as valid, the span is left unchanged. +func (s *Span) updateFromSentryTrace(header []byte) { + m := sentryTracePattern.FindSubmatch(header) + if m == nil { + // no match + return + } + _, _ = hex.Decode(s.TraceID[:], m[1]) + _, _ = hex.Decode(s.ParentSpanID[:], m[2]) + if len(m[3]) != 0 { + switch m[3][0] { + case '0': + s.Sampled = SampledFalse + case '1': + s.Sampled = SampledTrue + } + } +} + func (s *Span) MarshalJSON() ([]byte, error) { // span aliases Span to allow calling json.Marshal without an infinite loop. // It preserves all fields while none of the attached methods. @@ -46,6 +279,78 @@ func (s *Span) MarshalJSON() ([]byte, error) { }) } +func (s *Span) sample() bool { + // https://develop.sentry.dev/sdk/unified-api/tracing/#sampling + // #1 explicit sampling decision via StartSpan options. + if s.Sampled != SampledUndefined { + return s.Sampled == SampledTrue + } + hub := hubFromContext(s.ctx) + var clientOptions ClientOptions + client := hub.Client() + if client != nil { + clientOptions = hub.Client().Options() + } + samplingContext := SamplingContext{Span: s, Parent: s.parent} + // #2 use TracesSampler from ClientOptions. + sampler := clientOptions.TracesSampler + if sampler != nil { + return sampler.Sample(samplingContext) + } + // #3 inherit parent decision. + if s.parent != nil { + return s.parent.Sampled == SampledTrue + } + // #4 uniform sampling using TracesSampleRate. + sampler = &fixedRateSampler{ + Rate: clientOptions.TracesSampleRate, + } + return sampler.Sample(samplingContext) +} + +func (s *Span) toEvent() *Event { + if !s.isTransaction { + return nil // only transactions can be transformed into events + } + hub := hubFromContext(s.ctx) + + children := s.recorder.children() + finished := make([]*Span, 0, len(children)) + for _, child := range children { + if child.EndTime.IsZero() { + Logger.Printf("Dropped unfinished span: Op=%q TraceID=%s SpanID=%s", child.Op, child.TraceID, child.SpanID) + continue + } + finished = append(finished, child) + } + + return &Event{ + Type: transactionType, + Transaction: hub.Scope().Transaction(), + Contexts: map[string]interface{}{ + "trace": s.traceContext(), + }, + Tags: s.Tags, + Timestamp: s.EndTime, + StartTime: s.StartTime, + Spans: finished, + } +} + +func (s *Span) traceContext() TraceContext { + return TraceContext{ + TraceID: s.TraceID, + SpanID: s.SpanID, + ParentSpanID: s.ParentSpanID, + Op: s.Op, + Description: s.Description, + Status: s.Status, + } +} + +// spanRecorder stores the span tree. Guaranteed to be non-nil. +func (s *Span) spanRecorder() *spanRecorder { return s.recorder } + // TraceID identifies a trace. type TraceID [16]byte @@ -82,8 +387,8 @@ func (id SpanID) MarshalText() ([]byte, error) { // Zero values of TraceID and SpanID used for comparisons. var ( - //nolint // zeroTraceID TraceID - zeroSpanID SpanID + zeroTraceID TraceID + zeroSpanID SpanID ) // SpanStatus is the status of a span. @@ -190,3 +495,90 @@ func (tc *TraceContext) MarshalJSON() ([]byte, error) { ParentSpanID: parentSpanID, }) } + +// Sampled signifies a sampling decision. +type Sampled int8 + +// The possible trace sampling decisions are: SampledFalse, SampledUndefined +// (default) and SampledTrue. +const ( + SampledFalse Sampled = -1 + iota + SampledUndefined + SampledTrue +) + +func (s Sampled) String() string { + switch s { + case SampledFalse: + return "SampledFalse" + case SampledUndefined: + return "SampledUndefined" + case SampledTrue: + return "SampledTrue" + default: + return fmt.Sprintf("SampledInvalid(%d)", s) + } +} + +// A SpanOption is a function that can modify the properties of a span. +type SpanOption func(s *Span) + +// The TransactionName option sets the name of the current transaction. +// +// A span tree has a single transaction name, therefore using this option when +// starting a span affects the span tree as a whole, potentially overwriting a +// name set previously. +func TransactionName(name string) SpanOption { + return func(s *Span) { + hubFromContext(s.Context()).Scope().SetTransaction(name) + } +} + +// ContinueFromRequest returns a span option that updates the span to continue +// an existing trace. If it cannot detect an existing trace in the request, the +// span will be left unchanged. +func ContinueFromRequest(r *http.Request) SpanOption { + return func(s *Span) { + trace := r.Header.Get("sentry-trace") + if trace == "" { + return + } + s.updateFromSentryTrace([]byte(trace)) + } +} + +// spanContextKey is used to store span values in contexts. +type spanContextKey struct{} + +// TransactionFromContext returns the root span of the current transaction. It +// returns nil if no transaction is tracked in the context. +func TransactionFromContext(ctx context.Context) *Span { + if span, ok := ctx.Value(spanContextKey{}).(*Span); ok { + return span.recorder.root() + } + return nil +} + +// spanFromContext returns the last span stored in the context or a dummy +// non-nil span. +// +// TODO(tracing): consider exporting this. Without this, users cannot retrieve a +// span from a context since spanContextKey is not exported. +// +// This can be added retroactively, and in the meantime think better whether it +// should return nil (like GetHubFromContext), always non-nil (like +// HubFromContext), or both: two exported functions. +// +// Note the equivalence: +// +// SpanFromContext(ctx).StartChild(...) === StartSpan(ctx, ...) +// +// Another viable alternative to exporting this is some form of shortcut to get +// the "current transaction", so users can set tags, etc on automatically +// instrumented spans/transactions. +func spanFromContext(ctx context.Context) *Span { + if span, ok := ctx.Value(spanContextKey{}).(*Span); ok { + return span + } + return nil +} diff --git a/tracing_test.go b/tracing_test.go index c82819ef4..bc8305211 100644 --- a/tracing_test.go +++ b/tracing_test.go @@ -2,11 +2,17 @@ package sentry import ( "bytes" + "context" "encoding/hex" "encoding/json" "fmt" + "net/http" "reflect" "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" ) func TraceIDFromHex(s string) TraceID { @@ -79,3 +85,231 @@ func testMarshalJSONOmitEmptyParentSpanID(t *testing.T, v interface{}) { t.Fatalf("missing parent_span_id: %s", b) } } + +func TestStartSpan(t *testing.T) { + transport := &TransportMock{} + ctx := NewTestContext(ClientOptions{ + Transport: transport, + }) + op := "test.op" + transaction := "Test Transaction" + description := "A Description" + status := SpanStatusOK + parentSpanID := SpanIDFromHex("f00db33f") + sampled := SampledTrue + startTime := time.Now() + endTime := startTime.Add(3 * time.Second) + span := StartSpan(ctx, op, + TransactionName(transaction), + func(s *Span) { + s.Description = description + s.Status = status + s.ParentSpanID = parentSpanID + s.Sampled = sampled + s.StartTime = startTime + s.EndTime = endTime + }, + ) + span.Finish() + + SpanCheck{ + Sampled: sampled, + RecorderLen: 1, + }.Check(t, span) + + events := transport.Events() + if got := len(events); got != 1 { + t.Fatalf("sent %d events, want 1", got) + } + want := &Event{ + Type: transactionType, + Transaction: transaction, + Contexts: map[string]interface{}{ + "trace": TraceContext{ + TraceID: span.TraceID, + SpanID: span.SpanID, + ParentSpanID: parentSpanID, + Op: op, + Description: description, + Status: status, + }, + }, + Tags: nil, + // TODO(tracing): Set Transaction.Data here or in + // Contexts.Trace, or somewhere else. Currently ignored. + Extra: nil, + Timestamp: endTime, + StartTime: startTime, + } + opts := cmp.Options{ + cmpopts.IgnoreFields(Event{}, + "Contexts", "EventID", "Level", "Platform", + "Sdk", "ServerName", + ), + cmpopts.EquateEmpty(), + } + if diff := cmp.Diff(want, events[0], opts); diff != "" { + t.Fatalf("Event mismatch (-want +got):\n%s", diff) + } + // Check trace context explicitly, as we ignored all contexts above to + // disregard other contexts. + if diff := cmp.Diff(want.Contexts["trace"], events[0].Contexts["trace"]); diff != "" { + t.Fatalf("TraceContext mismatch (-want +got):\n%s", diff) + } +} + +func TestStartChild(t *testing.T) { + ctx := NewTestContext(ClientOptions{TracesSampleRate: 1.0}) + span := StartSpan(ctx, "top", TransactionName("Test Transaction")) + child := span.StartChild("child") + child.Finish() + span.Finish() + + c := SpanCheck{ + Sampled: SampledTrue, + RecorderLen: 2, + } + c.Check(t, span) + c.Check(t, child) +} + +// testContextKey is used to store a value in a context so that we can check +// that SDK operations on that context preserve the original context values. +type testContextKey struct{} +type testContextValue struct{} + +func NewTestContext(options ClientOptions) context.Context { + client, err := NewClient(options) + if err != nil { + panic(err) + } + hub := NewHub(client, NewScope()) + ctx := context.WithValue(context.Background(), testContextKey{}, testContextValue{}) + return SetHubOnContext(ctx, hub) +} + +// A SpanCheck is a test helper describing span properties that can be checked +// with the Check method. +type SpanCheck struct { + Sampled Sampled + ZeroTraceID bool + ZeroSpanID bool + RecorderLen int +} + +func (c SpanCheck) Check(t *testing.T, span *Span) { + t.Helper() + + // Invariant: original context values are preserved + gotCtx := span.Context() + if _, ok := gotCtx.Value(testContextKey{}).(testContextValue); !ok { + t.Errorf("original context value lost") + } + // Invariant: SpanFromContext(span.Context) == span + if spanFromContext(gotCtx) != span { + t.Errorf("span not in its context") + } + + if got := span.TraceID == zeroTraceID; got != c.ZeroTraceID { + want := "zero" + if !c.ZeroTraceID { + want = "non-" + want + } + t.Errorf("got TraceID = %s, want %s", span.TraceID, want) + } + if got := span.SpanID == zeroSpanID; got != c.ZeroSpanID { + want := "zero" + if !c.ZeroSpanID { + want = "non-" + want + } + t.Errorf("got SpanID = %s, want %s", span.SpanID, want) + } + if got, want := span.Sampled, c.Sampled; got != want { + t.Errorf("got Sampled = %v, want %v", got, want) + } + + if got, want := len(span.spanRecorder().spans), c.RecorderLen; got != want { + t.Errorf("got %d spans in recorder, want %d", got, want) + } + + if span.StartTime.IsZero() { + t.Error("start time not set") + } + if span.EndTime.IsZero() { + t.Error("end time not set") + } + if span.EndTime.Before(span.StartTime) { + t.Error("end time before start time") + } +} + +func TestToSentryTrace(t *testing.T) { + tests := []struct { + span *Span + want string + }{ + {&Span{}, "00000000000000000000000000000000-0000000000000000"}, + {&Span{Sampled: SampledTrue}, "00000000000000000000000000000000-0000000000000000-1"}, + {&Span{Sampled: SampledFalse}, "00000000000000000000000000000000-0000000000000000-0"}, + {&Span{TraceID: TraceID{1}}, "01000000000000000000000000000000-0000000000000000"}, + {&Span{SpanID: SpanID{1}}, "00000000000000000000000000000000-0100000000000000"}, + } + for _, tt := range tests { + if got := tt.span.ToSentryTrace(); got != tt.want { + t.Errorf("got %q, want %q", got, tt.want) + } + } +} + +func TestContinueSpanFromRequest(t *testing.T) { + traceID := TraceIDFromHex("bc6d53f15eb88f4320054569b8c553d4") + spanID := SpanIDFromHex("b72fa28504b07285") + + for _, sampled := range []Sampled{SampledTrue, SampledFalse, SampledUndefined} { + sampled := sampled + t.Run(sampled.String(), func(t *testing.T) { + var s Span + hkey := http.CanonicalHeaderKey("sentry-trace") + hval := (&Span{ + TraceID: traceID, + SpanID: spanID, + Sampled: sampled, + }).ToSentryTrace() + header := http.Header{hkey: []string{hval}} + ContinueFromRequest(&http.Request{Header: header})(&s) + if s.TraceID != traceID { + t.Errorf("got %q, want %q", s.TraceID, traceID) + } + if s.ParentSpanID != spanID { + t.Errorf("got %q, want %q", s.ParentSpanID, spanID) + } + if s.Sampled != sampled { + t.Errorf("got %q, want %q", s.Sampled, sampled) + } + }) + } +} + +func TestSpanFromContext(t *testing.T) { + // SpanFromContext always returns a non-nil value, such that you can use + // it without nil checks. + // When no span was in the context, the returned value is a no-op. + // Calling StartChild on the no-op creates a valid transaction. + // SpanFromContext(ctx).StartChild(...) === StartSpan(ctx, ...) + + ctx := NewTestContext(ClientOptions{}) + span := spanFromContext(ctx) + + _ = span + + // SpanCheck{ + // ZeroTraceID: true, + // ZeroSpanID: true, + // }.Check(t, span) + + // // Should create a transaction + // child := span.StartChild("top") + // SpanCheck{ + // RecorderLen: 1, + // }.Check(t, child) +} diff --git a/transport.go b/transport.go index 11cdab5cf..2c9f81008 100644 --- a/transport.go +++ b/transport.go @@ -49,6 +49,8 @@ func getTLSConfig(options ClientOptions) *tls.Config { } func retryAfter(now time.Time, r *http.Response) time.Duration { + // TODO(tracing): handle x-sentry-rate-limits, separate rate limiting + // per data type (error event, transaction, etc). retryAfterHeader := r.Header["Retry-After"] if retryAfterHeader == nil { diff --git a/util.go b/util.go index 0b4b04baf..fe3b92693 100644 --- a/util.go +++ b/util.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" "os" + "time" ) func uuid() string { @@ -20,11 +21,15 @@ func uuid() string { } func fileExists(fileName string) bool { - if _, err := os.Stat(fileName); err != nil { - return false - } + _, err := os.Stat(fileName) + return err == nil +} - return true +// monotonicTimeSince replaces uses of time.Now() to take into account the +// monotonic clock reading stored in start, such that duration = end - start is +// unaffected by changes in the system wall clock. +func monotonicTimeSince(start time.Time) (end time.Time) { + return start.Add(time.Since(start)) } //nolint: deadcode, unused