Skip to content

Commit

Permalink
Implement velero debug
Browse files Browse the repository at this point in the history
This PR added a subcommand `velero debug`, which leverages `crashd` to
collect logs and specs of velero server components and bundle them in a
tarball.

Signed-off-by: Daniel Jiang <jiangd@vmware.com>
  • Loading branch information
reasonerjt committed Aug 9, 2021
1 parent ed5809b commit 4c4e1af
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 9 deletions.
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ _testmain.go
*.test
*.prof

debug

/velero
.idea/

Expand All @@ -49,4 +47,4 @@ tilt-resources/tilt-settings.json
tilt-resources/velero_v1_backupstoragelocation.yaml
tilt-resources/deployment.yaml
tilt-resources/restic.yaml
tilt-resources/cloud
tilt-resources/cloud
1 change: 1 addition & 0 deletions changelogs/unreleased/4022-reasonerjt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implement velero debug
7 changes: 1 addition & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ require (
github.com/gofrs/uuid v3.2.0+incompatible
github.com/golang/protobuf v1.4.2
github.com/google/uuid v1.1.2
github.com/googleapis/gnostic v0.5.2 // indirect
github.com/hashicorp/go-hclog v0.0.0-20180709165350-ff2cf002a8dd
github.com/hashicorp/go-plugin v0.0.0-20190610192547-a1bc61569a26
github.com/joho/godotenv v1.3.0
Expand All @@ -31,20 +30,16 @@ require (
github.com/spf13/cobra v1.1.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.5.1
golang.org/x/crypto v0.0.0-20201012173705-84dcc777aaee // indirect
github.com/vmware-tanzu/crash-diagnostics v0.3.3
golang.org/x/mod v0.3.0
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b
golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/grpc v1.31.0
k8s.io/api v0.19.12
k8s.io/apiextensions-apiserver v0.19.12
k8s.io/apimachinery v0.19.12
k8s.io/cli-runtime v0.19.12
k8s.io/client-go v0.19.12
k8s.io/klog v1.0.0
k8s.io/klog/v2 v2.3.0 // indirect
k8s.io/utils v0.0.0-20201005171033-6301aaf42dc7 // indirect
sigs.k8s.io/cluster-api v0.3.11-0.20210106212952-b6c1b5b3db3d
sigs.k8s.io/controller-runtime v0.7.1-0.20201215171748-096b2e07c091
)
Expand Down
11 changes: 11 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-logr/logr v0.2.1/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-logr/logr v0.3.0 h1:q4c+kbcR0d5rSurhBR8dIgieOaYpXtsdTYfx22Cu6rs=
github.com/go-logr/logr v0.3.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-logr/zapr v0.2.0 h1:v6Ji8yBW77pva6NkJKQdHLAJKrIJKRHz0RXwPqCHSR4=
Expand Down Expand Up @@ -563,6 +564,10 @@ github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1
github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc=
github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/vektah/gqlparser v1.1.2/go.mod h1:1ycwN7Ij5njmMkPPAOaRFY4rET2Enx7IkVv3vaXspKw=
github.com/vladimirvivien/echo v0.0.1-alpha.6 h1:L1elSMyiiqia7+5ikH24xKIkYAlecRXP6i4YmAF1tkc=
github.com/vladimirvivien/echo v0.0.1-alpha.6/go.mod h1:64h/A7+5GmiBaeztyIr8BVf/07B7knV6OAP06jX+oyE=
github.com/vmware-tanzu/crash-diagnostics v0.3.3 h1:JB2LkIGg2I342jrqcebUk2RPykwnpIItQalBWcn3cpo=
github.com/vmware-tanzu/crash-diagnostics v0.3.3/go.mod h1:OqjOnW7wNTA8SUvh8sAMFewIgojH2Z7bL5I7zFj+i8w=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1/go.mod h1:QcJo0QPSfTONNIgpN5RA8prR7fF8nkF6cTWTcNerRO8=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
Expand All @@ -582,6 +587,8 @@ go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.starlark.net v0.0.0-20201006213952-227f4aabceb5 h1:ApvY/1gw+Yiqb/FKeks3KnVPWpkR3xzij82XPKLjJVw=
go.starlark.net v0.0.0-20201006213952-227f4aabceb5/go.mod h1:f0znQkUKRrkk36XxWbGjMqQM8wGv/xHBVE2qc3B5oFU=
go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk=
Expand Down Expand Up @@ -686,6 +693,7 @@ golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b h1:uwuIcX0g4Yl1NC5XAz37xsr2lTtcqevgzYNVt49waME=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
Expand Down Expand Up @@ -753,9 +761,11 @@ golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201013132646-2da7054afaeb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201112073958-5cba982894dd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210112080510-489259a85091 h1:DMyOG0U+gKfu8JZzg2UQe9MeaC1X+xQWlAKcRnjxjCw=
golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down Expand Up @@ -967,6 +977,7 @@ k8s.io/apimachinery v0.19.12/go.mod h1:9eb44nUQSsz9QZiilFRuMj3ZbTmoWolU8S2gnXoRM
k8s.io/apiserver v0.19.2/go.mod h1:FreAq0bJ2vtZFj9Ago/X0oNGC51GfubKK/ViOKfVAOA=
k8s.io/apiserver v0.19.12 h1:xQjt/jLqdYszJTRTDDnHUnA0S+a1TZ/8rgWr0/xoa6I=
k8s.io/apiserver v0.19.12/go.mod h1:ldZAZTNIKfMMv/UUEhk6UyTXC0/34iRdNFHo+MJOPc4=
k8s.io/cli-runtime v0.19.0/go.mod h1:tun9l0eUklT8IHIM0jors17KmUjcrAxn0myoBYwuNuo=
k8s.io/cli-runtime v0.19.2/go.mod h1:CMynmJM4Yf02TlkbhKxoSzi4Zf518PukJ5xep/NaNeY=
k8s.io/cli-runtime v0.19.12 h1:17snU0NcEnpU6TT5wcnBdBW/ydUQQ/qn82rKuAU5VkY=
k8s.io/cli-runtime v0.19.12/go.mod h1:KopjJ53HaHZjG+WhJmH8WxZzxnXVNkxP7GO1QiQJ2uI=
Expand Down
25 changes: 25 additions & 0 deletions pkg/cmd/cli/debug/cshd-scripts/velero.cshd
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
def capture_backup_logs(namespace):
if args.backup:
kube_capture(what="objects", namespaces=[namespace], groups=['velero.io'], kinds=['backup'], names=[args.backup])
backupLogsCmd = "velero --namespace={} backup logs {}".format(namespace, args.backup)
capture_local(cmd=backupLogsCmd, file_name="backup_{}.log".format(args.backup))
def capture_restore_logs(namespace):
if args.restore:
kube_capture(what="objects", namespaces=[namespace], groups=['velero.io'], kinds=['restore'], names=[args.restore])
restoreLogsCmd = "velero --namespace={} restore logs {}".format(namespace, args.restore)
capture_local(cmd=restoreLogsCmd, file_name="restore_{}.log".format(args.restore))

ns = args.namespace if args.namespace else "velero"
basedir = args.basedir if args.basedir else os.home
output = args.output if args.output else "bundle.tar.gz"
# Working dir for writing during script execution
crshd = crashd_config(workdir="{0}/velero-bundle".format(basedir))
set_defaults(kube_config(path=args.kubeconfig))
capture_local(cmd="velero version -n {}".format(ns), file_name="version.txt")
capture_backup_logs(ns)
capture_restore_logs(ns)
kube_capture(what="logs", namespaces=[ns])
kube_capture(what="objects", namespaces=[ns], groups=['velero.io'], kinds=['backupstoragelocation', 'podvolumebackup', 'podvolumerestore'])
archive(output_file=output, source_paths=[crshd.workdir])


140 changes: 140 additions & 0 deletions pkg/cmd/cli/debug/debug.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
Copyright 2017 the Velero contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package debug

import (
_ "embed"
"fmt"
"io/ioutil"
"os"
"path/filepath"

"github.com/spf13/cobra"
"github.com/spf13/pflag"
crashdCmd "github.com/vmware-tanzu/crash-diagnostics/cmd"
"k8s.io/client-go/tools/clientcmd"

"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd"
)

//go:embed cshd-scripts/velero.cshd
var scriptBytes []byte

type option struct {
// workdir for crashd will be $baseDir/velero-debug
baseDir string
// the namespace where velero server is installed
namespace string
// the absolute path for the log bundle to be generated
outputPath string
// the absolute path for the kubeconfig file that will be read by crashd for calling K8S API
kubeconfigPath string
// optional, the name of the backup resource whose log will be packaged into the debug bundle
backup string
// optional, the name of the restore resource whose log will be packaged into the debug bundle
restore string
}

func (o *option) bindFlags(flags *pflag.FlagSet) {
flags.StringVar(&o.outputPath, "output", "", "The path of the bundle tarball, by default it's $HOME/bundle.tar.gz. Optional")
flags.StringVar(&o.backup, "backup", "", "The name of the backup resource whose log will be collected, no backup logs will be collected if it's not set. Optional")
flags.StringVar(&o.restore, "restore", "", "The name of the restore resource whose log will be collected, no restore logs will be collected if it's not set. Optional")
}

func (o *option) asCrashdArgs() string {
return fmt.Sprintf("output=%s,namespace=%s,basedir=%s,backup=%s,restore=%s,kubeconfig=%s",
o.outputPath, o.namespace, o.baseDir, o.backup, o.restore, o.kubeconfigPath)
}

func (o *option) complete(f client.Factory, fs *pflag.FlagSet) error {
if len(o.outputPath) > 0 {
absOutputPath, err := filepath.Abs(o.outputPath)
if err != nil {
return fmt.Errorf("invalid output path: %v", err)
}
o.outputPath = absOutputPath
}
tmpDir, err := ioutil.TempDir("", "crashd")
if err != nil {
return err
}
o.baseDir = tmpDir
o.namespace = f.Namespace()
kp := kubeconfig(fs)
o.kubeconfigPath, err = filepath.Abs(kp)
if err != nil {
return fmt.Errorf("invalid kubeconfig path: %s, %v", kp, err)
}
return nil
}

// NewCommand creates a cobra command.
func NewCommand(f client.Factory) *cobra.Command {
o := &option{}
c := &cobra.Command{
Use: "debug",
Short: "Generate debug bundle",
Long: `Generate a tarball containing the logs of velero deployment, plugin logs, restic DaemonSet,
specs of BackupStorageLocations, PodVolumeBackups, PodVolumeRestores, and optionally the specs and logs of backup and restore.`,
Run: func(c *cobra.Command, args []string) {
defer func(opt *option) {
if len(o.baseDir) > 0 {
if err := os.RemoveAll(o.baseDir); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove temp dir: %s: %v\n", o.baseDir, err)
}
}
}(o)
flags := c.Flags()
err := o.complete(f, flags)
cmd.CheckError(err)
err2 := runCrashd(o.asCrashdArgs())
cmd.CheckError(err2)
},
}
o.bindFlags(c.Flags())
return c
}

func runCrashd(argString string) error {
bak := os.Args
defer func() { os.Args = bak }()
f, err := ioutil.TempFile("", "velero*.cshd")
if err != nil {
return err
}
defer func() {
if err := os.Remove(f.Name()); err != nil {
fmt.Fprintf(os.Stderr, "Failed to remove the temp file: %s, %v\n", f.Name(), err)
}
}()
_, err2 := f.Write(scriptBytes)
if err2 != nil {
return err2
}
os.Args = []string{"", "run", "--debug", f.Name(), "--args", fmt.Sprintf("%s", argString)}
return crashdCmd.Run()
}

func kubeconfig(fs *pflag.FlagSet) string {
pathOpt := clientcmd.NewDefaultPathOptions()
kubeconfig, _ := fs.GetString("kubeconfig")
if len(kubeconfig) > 0 {
pathOpt.LoadingRules.ExplicitPath = kubeconfig
}
return pathOpt.GetDefaultFilename()
}
3 changes: 3 additions & 0 deletions pkg/cmd/velero/velero.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ import (
"github.com/spf13/cobra"
"k8s.io/klog"

"github.com/vmware-tanzu/velero/pkg/cmd/cli/debug"

"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd/cli/backup"
"github.com/vmware-tanzu/velero/pkg/cmd/cli/backuplocation"
Expand Down Expand Up @@ -116,6 +118,7 @@ operations can also be performed as 'velero backup get' and 'velero schedule cre
bug.NewCommand(),
backuplocation.NewCommand(f),
snapshotlocation.NewCommand(f),
debug.NewCommand(f),
)

// init and add the klog flags
Expand Down

0 comments on commit 4c4e1af

Please sign in to comment.