Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add start of elastic-agent diagnostics command #28265

Merged
merged 14 commits into from
Oct 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ for a few releases. Please use other tools provided by Elastic to fetch data fro
- Update ECS to 1.12.0. {pull}27770[27770]
- Fields mapped as `match_only_text` will automatically fallback to a `text` mapping when using Elasticsearch versions that do not support `match_only_text`. {pull}27770[27770]
- Update cloud.google.com/go library. {pull}28229[28229]
- Add additional metadata to the root HTTP endpoint. {pull}28265[28265]
- Upgrade k8s.io/client-go library. {pull}28228[28228]

*Auditbeat*
Expand Down
20 changes: 18 additions & 2 deletions libbeat/cmd/instance/beat.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"math/big"
"math/rand"
"os"
"os/user"
"runtime"
"runtime/debug"
"strings"
Expand Down Expand Up @@ -162,7 +163,7 @@ func Run(settings Settings, bt beat.Creator) error {

name := settings.Name
idxPrefix := settings.IndexPrefix
version := settings.Version
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
agentVersion := settings.Version
elasticLicensed := settings.ElasticLicensed

return handleError(func() error {
Expand All @@ -172,7 +173,7 @@ func Run(settings Settings, bt beat.Creator) error {
"panic", r, zap.Stack("stack"))
}
}()
b, err := NewBeat(name, idxPrefix, version, elasticLicensed)
b, err := NewBeat(name, idxPrefix, agentVersion, elasticLicensed)
if err != nil {
return err
}
Expand All @@ -184,6 +185,20 @@ func Run(settings Settings, bt beat.Creator) error {
monitoring.NewString(registry, "name").Set(b.Info.Name)
monitoring.NewString(registry, "hostname").Set(b.Info.Hostname)

// Add more beat metadata
monitoring.NewString(registry, "binary_arch").Set(runtime.GOARCH)
monitoring.NewString(registry, "build_commit").Set(version.Commit())
monitoring.NewTimestamp(registry, "build_time").Set(version.BuildTime())
monitoring.NewBool(registry, "elastic_licensed").Set(b.Info.ElasticLicensed)

u, err := user.Current()
if err != nil {
return err
}
monitoring.NewString(registry, "username").Set(u.Username)
monitoring.NewString(registry, "uid").Set(u.Uid)
monitoring.NewString(registry, "gid").Set(u.Gid)

// Add additional info to state registry. This is also reported to monitoring
stateRegistry := monitoring.GetNamespace("state").GetRegistry()
serviceRegistry := stateRegistry.NewRegistry("service")
Expand Down Expand Up @@ -414,6 +429,7 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error {
// Set Beat ID in registry vars, in case it was loaded from meta file
infoRegistry := monitoring.GetNamespace("info").GetRegistry()
monitoring.NewString(infoRegistry, "uuid").Set(b.Info.ID.String())
monitoring.NewString(infoRegistry, "ephemeral_id").Set(b.Info.EphemeralID.String())

serviceRegistry := monitoring.GetNamespace("state").GetRegistry().GetRegistry("service")
monitoring.NewString(serviceRegistry, "id").Set(b.Info.ID.String())
Expand Down
1 change: 1 addition & 0 deletions x-pack/elastic-agent/CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,5 @@
- Agent now adapts the beats queue size based on output settings. {issue}26638[26638] {pull}27429[27429]
- Support ephemeral containers in Kubernetes dynamic provider. {issue}27020[#27020] {pull}27707[27707]
- Add complete k8s metadata through composable provider. {pull}27691[27691]
- Add diagnostics command to gather beat metadata. {pull}28265[28265]
- Add `KIBANA_FLEET_SERVICE_TOKEN` to Elastic Agent container. {pull}28096[28096]
27 changes: 27 additions & 0 deletions x-pack/elastic-agent/control.proto
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,25 @@ message ApplicationStatus {
string payload = 5;
}

// Current metadata for a running process.
message ProcMeta {
string process = 1;
string name = 2;
string hostname = 3;
string id = 4;
string ephemeral_id = 5;
string version = 6;
string build_commit = 7;
string build_time = 8;
string username = 9;
string user_id = 10;
string user_gid = 11;
string architecture = 12;
string route_key = 13;
bool elastic_licensed = 14;
string error = 15;
}

// Status is the current status of Elastic Agent.
message StatusResponse {
// Overall status of Elastic Agent.
Expand All @@ -104,6 +123,11 @@ message StatusResponse {
repeated ApplicationStatus applications = 3;
}

// ProcMetaResponse is the current running version infomation for all processes.
message ProcMetaResponse {
repeated ProcMeta procs = 1;
}

service ElasticAgentControl {
// Fetches the currently running version of the Elastic Agent.
rpc Version(Empty) returns (VersionResponse);
Expand All @@ -116,4 +140,7 @@ service ElasticAgentControl {

// Upgrade starts the upgrade process of Elastic Agent.
rpc Upgrade(UpgradeRequest) returns (UpgradeResponse);

// Gather all running process metadata.
rpc ProcMeta(Empty) returns (ProcMetaResponse);
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package stream
import (
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/application/pipeline"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/configrequest"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/program"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/logger"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/state"
)
Expand All @@ -20,6 +21,10 @@ type stater interface {
State() map[string]state.State
}

type specer interface {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably you meant specser. i dont know if it is a real world but it sounds funny :-)
no more naming comments from me

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think specser is a bit too strange, it does more to hide what the interface is doing than specer

Specs() map[string]program.Spec
}

func (b *operatorStream) Close() error {
return b.configHandler.Close()
}
Expand All @@ -32,6 +37,13 @@ func (b *operatorStream) State() map[string]state.State {
return nil
}

func (b *operatorStream) Specs() map[string]program.Spec {
if s, ok := b.configHandler.(specer); ok {
return s.Specs()
}
return nil
}

func (b *operatorStream) Execute(cfg configrequest.Request) error {
return b.configHandler.HandleConfig(cfg)
}
Expand Down
1 change: 1 addition & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func NewCommandWithArgs(args []string, streams *cli.IOStreams) *cobra.Command {
cmd.AddCommand(newWatchCommandWithArgs(args, streams))
cmd.AddCommand(newContainerCommand(args, streams))
cmd.AddCommand(newStatusCommand(args, streams))
cmd.AddCommand(newDiagnosticsCommand(args, streams))

// windows special hidden sub-command (only added on windows)
reexec := newReExecWindowsCommand(args, streams)
Expand Down
134 changes: 134 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/diagnostics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package cmd

import (
"context"
"fmt"
"io"
"os"
"text/tabwriter"
"time"

"github.com/spf13/cobra"

"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/control/client"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/errors"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/cli"
)

var diagOutputs = map[string]outputter{
"human": humanDiagnosticsOutput,
"json": jsonOutput,
"yaml": yamlOutput,
}

func newDiagnosticsCommand(_ []string, streams *cli.IOStreams) *cobra.Command {
cmd := &cobra.Command{
Use: "diagnostics",
Short: "Gather diagnostics information from the elastic-agent and running processes.",
Long: "Gather diagnostics information from the elastic-agent and running processes.",
Run: func(c *cobra.Command, args []string) {
if err := diagnosticCmd(streams, c, args); err != nil {
fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage())
os.Exit(1)
}
},
}

cmd.Flags().String("output", "human", "Output the diagnostics information in either human, json, or yaml (default: human)")

return cmd
}

// DiagnosticsInfo a struct to track all inforation related to diagnostics for the agent.
type DiagnosticsInfo struct {
ProcMeta []client.ProcMeta
AgentVersion client.Version
}

func diagnosticCmd(streams *cli.IOStreams, cmd *cobra.Command, args []string) error {
err := tryContainerLoadPaths()
if err != nil {
return err
}

output, _ := cmd.Flags().GetString("output")
outputFunc, ok := diagOutputs[output]
if !ok {
return fmt.Errorf("unsupported output: %s", output)
}

ctx := handleSignal(context.Background())
innerCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()

diag, err := getDiagnostics(innerCtx)
if err == context.DeadlineExceeded {
return errors.New("timed out after 30 seconds trying to connect to Elastic Agent daemon")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

30s sounds like a long time for a timeout for a local diagnostic commands. I would expect the diagnostic command to be pretty quick or is there some "remote" dependency?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copied timeout from the status command

} else if err == context.Canceled {
return nil
} else if err != nil {
return fmt.Errorf("failed to communicate with Elastic Agent daemon: %s", err)
}

return outputFunc(streams.Out, diag)
}

func getDiagnostics(ctx context.Context) (DiagnosticsInfo, error) {
daemon := client.New()
diag := DiagnosticsInfo{}
err := daemon.Connect(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
defer daemon.Disconnect()

bv, err := daemon.ProcMeta(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
diag.ProcMeta = bv

version, err := daemon.Version(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
diag.AgentVersion = version

return diag, nil
}

func humanDiagnosticsOutput(w io.Writer, obj interface{}) error {
diag, ok := obj.(DiagnosticsInfo)
if !ok {
return fmt.Errorf("unable to cast %T as DiagnosticsInfo", obj)
}
return outputDiagnostics(w, diag)
}

func outputDiagnostics(w io.Writer, d DiagnosticsInfo) error {
tw := tabwriter.NewWriter(w, 4, 1, 2, ' ', 0)
fmt.Fprintf(tw, "elastic-agent\tversion: %s\n", d.AgentVersion.Version)
fmt.Fprintf(tw, "\tbuild_commit: %s\tbuild_time: %s\tsnapshot_build: %v\n", d.AgentVersion.Commit, d.AgentVersion.BuildTime, d.AgentVersion.Snapshot)
if len(d.ProcMeta) == 0 {
fmt.Fprintf(tw, "Applications: (none)\n")
} else {
fmt.Fprintf(tw, "Applications:\n")
for _, app := range d.ProcMeta {
fmt.Fprintf(tw, " *\tname: %s\troute_key: %s\n", app.Name, app.RouteKey)
if app.Error != "" {
fmt.Fprintf(tw, "\terror: %s\n", app.Error)
} else {
fmt.Fprintf(tw, "\tprocess: %s\tid: %s\tephemeral_id: %s\telastic_license: %v\n", app.Process, app.ID, app.EphemeralID, app.ElasticLicensed)
fmt.Fprintf(tw, "\tversion: %s\tcommit: %s\tbuild_time: %s\tbinary_arch: %v\n", app.Version, app.BuildCommit, app.BuildTime, app.BinaryArchitecture)
fmt.Fprintf(tw, "\thostname: %s\tusername: %s\tuser_id: %s\tuser_gid: %s\n", app.Hostname, app.Username, app.UserID, app.UserGID)
}

}
}
tw.Flush()
return nil
}
74 changes: 74 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/diagnostics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package cmd

import (
"os"
"time"

"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/control/client"
)

var testDiagnostics = DiagnosticsInfo{
AgentVersion: client.Version{
Version: "test-version",
Commit: "test-commit",
BuildTime: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
Snapshot: false,
},
ProcMeta: []client.ProcMeta{client.ProcMeta{
Process: "filebeat",
Name: "filebeat",
Hostname: "test-host",
ID: "filebeat-id",
EphemeralID: "filebeat-ephemeral-id",
Version: "filebeat-version",
BuildCommit: "filebeat-commit",
BuildTime: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
Username: "test-user",
UserID: "1000",
UserGID: "1000",
BinaryArchitecture: "test-architecture",
RouteKey: "test",
ElasticLicensed: true,
}, client.ProcMeta{
Process: "filebeat",
Name: "filebeat_monitoring",
Hostname: "test-host",
ID: "filebeat_monitoring-id",
EphemeralID: "filebeat_monitoring-ephemeral-id",
Version: "filebeat_monitoring-version",
BuildCommit: "filebeat_monitoring-commit",
BuildTime: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
Username: "test-user",
UserID: "1000",
UserGID: "1000",
BinaryArchitecture: "test-architecture",
RouteKey: "test",
ElasticLicensed: true,
}, client.ProcMeta{
Name: "metricbeat",
RouteKey: "test",
Error: "failed to get metricbeat data",
}},
}

func Example_humanDiagnosticsOutput() {
humanDiagnosticsOutput(os.Stdout, testDiagnostics)
// Output:
// elastic-agent version: test-version
// build_commit: test-commit build_time: 2021-01-01 00:00:00 +0000 UTC snapshot_build: false
// Applications:
// * name: filebeat route_key: test
// process: filebeat id: filebeat-id ephemeral_id: filebeat-ephemeral-id elastic_license: true
// version: filebeat-version commit: filebeat-commit build_time: 2021-01-01 00:00:00 +0000 UTC binary_arch: test-architecture
// hostname: test-host username: test-user user_id: 1000 user_gid: 1000
// * name: filebeat_monitoring route_key: test
// process: filebeat id: filebeat_monitoring-id ephemeral_id: filebeat_monitoring-ephemeral-id elastic_license: true
// version: filebeat_monitoring-version commit: filebeat_monitoring-commit build_time: 2021-01-01 00:00:00 +0000 UTC binary_arch: test-architecture
// hostname: test-host username: test-user user_id: 1000 user_gid: 1000
// * name: metricbeat route_key: test
// error: failed to get metricbeat data
}
2 changes: 2 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ func run(streams *cli.IOStreams, override cfgOverrider) error {
return err
}

control.SetRouteFn(app.Routes)

serverStopFn, err := setupMetrics(agentInfo, logger, cfg.Settings.DownloadConfig.OS(), cfg.Settings.MonitoringConfig, app)
if err != nil {
return err
Expand Down
Loading