Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental support for tracing #16020

Merged
merged 8 commits into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ verify-codegen:

.PHONY: protobuf
protobuf:
protoc --go_out=. --go_opt=paths=source_relative pkg/otel/otlptracefile/pb/file.proto
go run golang.org/x/tools/cmd/goimports@latest -w pkg/otel/otlptracefile/pb/file.pb.go
cd ${GOPATH_1ST}/src; protoc --gogo_out=. k8s.io/kops/protokube/pkg/gossip/mesh/mesh.proto

.PHONY: hooks
Expand Down
3 changes: 3 additions & 0 deletions cmd/kops/create_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,9 @@ func NewCmdCreateCluster(f *util.Factory, out io.Writer) *cobra.Command {
}

func RunCreateCluster(ctx context.Context, f *util.Factory, out io.Writer, c *CreateClusterOptions) error {
ctx, span := tracer.Start(ctx, "RunCreateCluster")
defer span.End()

isDryrun := false
// direct requires --yes (others do not, because they don't make changes)
targetName := c.Target
Expand Down
21 changes: 21 additions & 0 deletions cmd/kops/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
Copyright 2023 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import "go.opentelemetry.io/otel"

var tracer = otel.Tracer("k8s.io/kops/cmd/kops")
40 changes: 38 additions & 2 deletions cmd/kops/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,45 @@ limitations under the License.

package main // import "k8s.io/kops/cmd/kops"

import "context"
import (
"context"
"fmt"
"os"

"k8s.io/kops"
)

func main() {
ctx := context.Background()
Execute(ctx)
if err := run(ctx); err != nil {
os.Exit(1)
}
}

func run(ctx context.Context) error {
// Set up OpenTelemetry.
serviceName := "kops"
serviceVersion := kops.Version
if kops.GitVersion != "" {
serviceVersion += ".git-" + kops.GitVersion
}

otelShutdown, err := setupOTelSDK(ctx, serviceName, serviceVersion)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
return err
}
// Handle shutdown properly so nothing leaks.
defer func() {
// We use a background context because the main context has probably been shut down.
if err := otelShutdown(context.Background()); err != nil {
rifelpet marked this conversation as resolved.
Show resolved Hide resolved
fmt.Fprintf(os.Stderr, "error shutting down otel: %v\n", err)
}
}()

if err := Execute(ctx); err != nil {
return err
}

return nil
}
145 changes: 145 additions & 0 deletions cmd/kops/otel.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
Copyright 2023 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"errors"
"fmt"
"net/http"
"os"
"path/filepath"
"strings"
"time"

"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"

"k8s.io/kops/pkg/otel/otlptracefile"
)

// setupOTelSDK bootstraps the OpenTelemetry pipeline.
// If it does not return an error, make sure to call shutdown for proper cleanup.
func setupOTelSDK(ctx context.Context, serviceName, serviceVersion string) (shutdown func(context.Context) error, err error) {
var shutdownFuncs []func(context.Context) error

// shutdown calls cleanup functions registered via shutdownFuncs.
// The errors from the calls are joined.
// Each registered cleanup will be invoked once.
shutdown = func(ctx context.Context) error {
var err error
for _, fn := range shutdownFuncs {
err = errors.Join(err, fn(ctx))
}
shutdownFuncs = nil
return err
}

// handleErr calls shutdown for cleanup and makes sure that all errors are returned.
handleErr := func(inErr error) {
err = errors.Join(inErr, shutdown(ctx))
}

// Setup resource.
res, err := newResource(serviceName, serviceVersion)
if err != nil {
handleErr(err)
return
}

// Setup trace provider.
tracerProvider, err := newTraceProvider(ctx, res)
if err != nil {
handleErr(err)
return
}
if tracerProvider != nil {
shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
otel.SetTracerProvider(tracerProvider)

http.DefaultClient = &http.Client{
Transport: otelhttp.NewTransport(http.DefaultTransport),
}
}

return
}

func newResource(serviceName, serviceVersion string) (*resource.Resource, error) {
return resource.Merge(resource.Default(),
resource.NewWithAttributes(semconv.SchemaURL,
semconv.ServiceName(serviceName),
semconv.ServiceVersion(serviceVersion),
))
}

func newTraceProvider(ctx context.Context, res *resource.Resource) (*trace.TracerProvider, error) {
destIsDirectory := false

dest := os.Getenv("OTEL_EXPORTER_OTLP_TRACES_FILE")
if dest == "" {
dest = os.Getenv("OTEL_EXPORTER_OTLP_FILE")
}
if dest == "" {
dest = os.Getenv("OTEL_EXPORTER_OTLP_TRACES_DIR")
if dest != "" {
destIsDirectory = true
}
}
if dest == "" {
dest = os.Getenv("OTEL_EXPORTER_OTLP_DIR")
if dest != "" {
destIsDirectory = true
}
}
if dest == "" {
return nil, nil
}

// If we are writing to a directory, construct a (likely) unique name
if destIsDirectory {
if err := os.MkdirAll(dest, 0755); err != nil {
return nil, fmt.Errorf("creating directories %q: %w", dest, err)
}
processName, err := os.Executable()
if err != nil {
return nil, fmt.Errorf("getting process name: %w", err)
}
processName = filepath.Base(processName)
processName = strings.TrimSuffix(processName, ".exe")
pid := os.Getpid()
timestamp := time.Now().UTC().Format(time.RFC3339)
filename := fmt.Sprintf("%s-%d-%s.otel", processName, pid, timestamp)
dest = filepath.Join(dest, filename)
}

traceExporter, err := otlptracefile.New(ctx, otlptracefile.WithPath(dest))
if err != nil {
return nil, err
}

traceProvider := trace.NewTracerProvider(
trace.WithBatcher(traceExporter,
// Default is 5s. Set to 1s for demonstrative purposes.
trace.WithBatchTimeout(time.Second)),
trace.WithResource(res),
)
return traceProvider, nil
}
11 changes: 7 additions & 4 deletions cmd/kops/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ import (

"github.com/spf13/cobra"
"github.com/spf13/viper"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"k8s.io/apimachinery/pkg/util/validation/field"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/util/homedir"
Expand Down Expand Up @@ -89,12 +91,13 @@ var rootCommand = RootCmd{
},
}

func Execute(ctx context.Context) {
func Execute(ctx context.Context) error {
ctx, span := tracer.Start(ctx, "kops", trace.WithAttributes(attribute.StringSlice("args", os.Args)))
defer span.End()

goflag.Set("logtostderr", "true")
goflag.CommandLine.Parse([]string{})
if err := rootCommand.cobraCommand.ExecuteContext(ctx); err != nil {
os.Exit(1)
}
return rootCommand.cobraCommand.ExecuteContext(ctx)
}

func init() {
Expand Down
23 changes: 23 additions & 0 deletions docs/opentelemetry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# OpenTelemetry support

kOps is experimenting with initial support for OpenTelemetry, starting with tracing.

The support should be considered experimental; the trace file format and schema will likely change, and these initial experiments might be removed entirely.

kOps supports a "serverless" mode of operation, where it logs the OpenTracing output to a file. We do this because our e2e test runner (prow) doesn't yet have a destination for OpenTelemetry data.

To try this out:

`OTEL_EXPORTER_OTLP_TRACES_FILE=/tmp/trace go run ./cmd/kops get cluster`

You should now see that the /tmp/trace file is created.

Then we have an experimental tool to serve the trace file to jaeger:

```
cd tools/otel/traceserver
go run . --src /tmp/trace --run jaeger
```

Not everything is instrumented yet, and not all the traces are fully joined up (we need to thread more contexts through more methods),
but you should be able to start to explore the operations that we run and their performance.
19 changes: 19 additions & 0 deletions docs/releases/1.29-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,25 @@ This is a document to gather the release notes prior to the release.

# Significant changes

## Initial OpenTelemetry Support

We are starting to add (experimental) support for OpenTelemetry,
in particular Tracing support. Setting `OTEL_EXPORTER_OTLP_TRACES_FILE`
will write a trace file which can then be read by the traceserver program.
More information and options are described in [docs/opentelemetry.md](/docs/opentelemetry.md).
The tracing data is not expected to be particularly useful for end-users in
this release; the (non-standard) recording approach is instead intended to
work well with our Prow end-to-end testing system so that developers can
optimize kOps.

Please note: this is *not* telemetry in the "phone-home" sense.
The kOps project does not collect data from your machine. As an
open-source project we do not even want to collect any of your data.
Currently the only OpenTelemetry backend supported is writing to a
filesystem (and it is opt-in). In future you will be able to configure
other OpenTelemetry backends, but this data will only be sent if
you enable OpenTelemetry, and only sent to where you configure.

## AWS

* Network Load Balancers in front of the Kubernetes API and bastion hosts now
Expand Down
11 changes: 9 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ require (
github.com/spotinst/spotinst-sdk-go v1.145.0
github.com/stretchr/testify v1.8.4
github.com/weaveworks/mesh v0.0.0-20191105120815-58dbcc3e8e63
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0
go.opentelemetry.io/otel v1.19.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
go.opentelemetry.io/proto/otlp v1.0.0
go.uber.org/multierr v1.11.0
golang.org/x/crypto v0.15.0
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
Expand Down Expand Up @@ -115,6 +121,7 @@ require (
github.com/evanphx/json-patch/v5 v5.6.0 // indirect
github.com/evertras/bubble-table v0.14.4 // indirect
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-errors/errors v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
Expand All @@ -137,6 +144,7 @@ require (
github.com/googleapis/gax-go/v2 v2.12.0 // indirect
github.com/gorilla/mux v1.8.0 // indirect
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
Expand Down Expand Up @@ -209,9 +217,7 @@ require (
github.com/vbatts/tar-split v0.11.3 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/otel v1.19.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/term v0.14.0 // indirect
Expand All @@ -220,6 +226,7 @@ require (
golang.org/x/tools v0.15.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20231016165738-49dd2c1f3d0b // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
Expand Down
12 changes: 12 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzw
github.com/golang-jwt/jwt/v4 v4.5.0 h1:7cYmW1XlMY7h7ii7UhUyChSgS5wUJEnm9uZVTGqOWzg=
github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo=
github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
Expand Down Expand Up @@ -417,6 +419,8 @@ github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/ad
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA=
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
Expand Down Expand Up @@ -731,13 +735,21 @@ go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 h1:x8Z78aZx8cOF0+Kkazoc7lwUNMGy0LrzEMxTm4BbTxg=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0/go.mod h1:62CPTSry9QZtOaSsE3tOzhx6LzDhHnXJ6xHeMNNiM6Q=
go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs=
go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 h1:Mne5On7VWdx7omSrSSZvM4Kw7cS7NQkOOmLcgscI51U=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0/go.mod h1:IPtUMKL4O3tH5y+iXVyAXqpAwMuzC1IrxVS81rummfE=
go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE=
go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8=
go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o=
go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A=
go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg=
go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I=
go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM=
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY=
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca/go.mod h1:jxU+3+j+71eXOW14274+SmmuW82qJzl6iZSeqEtTGds=
go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
Expand Down
Loading