Skip to content

Commit

Permalink
Add start of elastic-agent diagnostics command (#28265)
Browse files Browse the repository at this point in the history
This PR starts the elastic-agent diagnostics command.
The beats info ("/") HTTP endpoint has been changed to add more data about the running beat including git commit and ephemeral ID.
Currently the diagnostics command will gather beats metadata information from the endpoint and display them along with agent version information.

(cherry picked from commit 887e40a)
  • Loading branch information
michel-laterman authored and mergify-bot committed Oct 15, 2021
1 parent 9ac3d75 commit b641d05
Show file tree
Hide file tree
Showing 18 changed files with 917 additions and 81 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Add default seccomp policy for linux arm64. {pull}27955[27955]
- Add cluster level add_kubernetes_metadata support for centralized enrichment {pull}24621[24621]
- Update cloud.google.com/go library. {pull}28229[28229]
- Add additional metadata to the root HTTP endpoint. {pull}28265[28265]
- Upgrade k8s.io/client-go library. {pull}28228[28228]
- Update ECS to 1.12.0. {pull}27770[27770]
- Fields mapped as `match_only_text` will automatically fallback to a `text` mapping when using Elasticsearch versions that do not support `match_only_text`. {pull}27770[27770]
Expand Down
20 changes: 18 additions & 2 deletions libbeat/cmd/instance/beat.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"math/big"
"math/rand"
"os"
"os/user"
"runtime"
"runtime/debug"
"strings"
Expand Down Expand Up @@ -162,7 +163,7 @@ func Run(settings Settings, bt beat.Creator) error {

name := settings.Name
idxPrefix := settings.IndexPrefix
version := settings.Version
agentVersion := settings.Version
elasticLicensed := settings.ElasticLicensed

return handleError(func() error {
Expand All @@ -172,7 +173,7 @@ func Run(settings Settings, bt beat.Creator) error {
"panic", r, zap.Stack("stack"))
}
}()
b, err := NewBeat(name, idxPrefix, version, elasticLicensed)
b, err := NewBeat(name, idxPrefix, agentVersion, elasticLicensed)
if err != nil {
return err
}
Expand All @@ -184,6 +185,20 @@ func Run(settings Settings, bt beat.Creator) error {
monitoring.NewString(registry, "name").Set(b.Info.Name)
monitoring.NewString(registry, "hostname").Set(b.Info.Hostname)

// Add more beat metadata
monitoring.NewString(registry, "binary_arch").Set(runtime.GOARCH)
monitoring.NewString(registry, "build_commit").Set(version.Commit())
monitoring.NewTimestamp(registry, "build_time").Set(version.BuildTime())
monitoring.NewBool(registry, "elastic_licensed").Set(b.Info.ElasticLicensed)

u, err := user.Current()
if err != nil {
return err
}
monitoring.NewString(registry, "username").Set(u.Username)
monitoring.NewString(registry, "uid").Set(u.Uid)
monitoring.NewString(registry, "gid").Set(u.Gid)

// Add additional info to state registry. This is also reported to monitoring
stateRegistry := monitoring.GetNamespace("state").GetRegistry()
serviceRegistry := stateRegistry.NewRegistry("service")
Expand Down Expand Up @@ -414,6 +429,7 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error {
// Set Beat ID in registry vars, in case it was loaded from meta file
infoRegistry := monitoring.GetNamespace("info").GetRegistry()
monitoring.NewString(infoRegistry, "uuid").Set(b.Info.ID.String())
monitoring.NewString(infoRegistry, "ephemeral_id").Set(b.Info.EphemeralID.String())

serviceRegistry := monitoring.GetNamespace("state").GetRegistry().GetRegistry("service")
monitoring.NewString(serviceRegistry, "id").Set(b.Info.ID.String())
Expand Down
1 change: 1 addition & 0 deletions x-pack/elastic-agent/CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,5 @@
- Agent now adapts the beats queue size based on output settings. {issue}26638[26638] {pull}27429[27429]
- Support ephemeral containers in Kubernetes dynamic provider. {issue}27020[#27020] {pull}27707[27707]
- Add complete k8s metadata through composable provider. {pull}27691[27691]
- Add diagnostics command to gather beat metadata. {pull}28265[28265]
- Add `KIBANA_FLEET_SERVICE_TOKEN` to Elastic Agent container. {pull}28096[28096]
27 changes: 27 additions & 0 deletions x-pack/elastic-agent/control.proto
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,25 @@ message ApplicationStatus {
string payload = 5;
}

// Current metadata for a running process.
message ProcMeta {
string process = 1;
string name = 2;
string hostname = 3;
string id = 4;
string ephemeral_id = 5;
string version = 6;
string build_commit = 7;
string build_time = 8;
string username = 9;
string user_id = 10;
string user_gid = 11;
string architecture = 12;
string route_key = 13;
bool elastic_licensed = 14;
string error = 15;
}

// Status is the current status of Elastic Agent.
message StatusResponse {
// Overall status of Elastic Agent.
Expand All @@ -104,6 +123,11 @@ message StatusResponse {
repeated ApplicationStatus applications = 3;
}

// ProcMetaResponse is the current running version infomation for all processes.
message ProcMetaResponse {
repeated ProcMeta procs = 1;
}

service ElasticAgentControl {
// Fetches the currently running version of the Elastic Agent.
rpc Version(Empty) returns (VersionResponse);
Expand All @@ -116,4 +140,7 @@ service ElasticAgentControl {

// Upgrade starts the upgrade process of Elastic Agent.
rpc Upgrade(UpgradeRequest) returns (UpgradeResponse);

// Gather all running process metadata.
rpc ProcMeta(Empty) returns (ProcMetaResponse);
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package stream
import (
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/application/pipeline"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/configrequest"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/program"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/logger"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/state"
)
Expand All @@ -20,6 +21,10 @@ type stater interface {
State() map[string]state.State
}

type specer interface {
Specs() map[string]program.Spec
}

func (b *operatorStream) Close() error {
return b.configHandler.Close()
}
Expand All @@ -32,6 +37,13 @@ func (b *operatorStream) State() map[string]state.State {
return nil
}

func (b *operatorStream) Specs() map[string]program.Spec {
if s, ok := b.configHandler.(specer); ok {
return s.Specs()
}
return nil
}

func (b *operatorStream) Execute(cfg configrequest.Request) error {
return b.configHandler.HandleConfig(cfg)
}
Expand Down
1 change: 1 addition & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func NewCommandWithArgs(args []string, streams *cli.IOStreams) *cobra.Command {
cmd.AddCommand(newWatchCommandWithArgs(args, streams))
cmd.AddCommand(newContainerCommand(args, streams))
cmd.AddCommand(newStatusCommand(args, streams))
cmd.AddCommand(newDiagnosticsCommand(args, streams))

// windows special hidden sub-command (only added on windows)
reexec := newReExecWindowsCommand(args, streams)
Expand Down
134 changes: 134 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/diagnostics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package cmd

import (
"context"
"fmt"
"io"
"os"
"text/tabwriter"
"time"

"github.com/spf13/cobra"

"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/control/client"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/errors"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/cli"
)

var diagOutputs = map[string]outputter{
"human": humanDiagnosticsOutput,
"json": jsonOutput,
"yaml": yamlOutput,
}

func newDiagnosticsCommand(_ []string, streams *cli.IOStreams) *cobra.Command {
cmd := &cobra.Command{
Use: "diagnostics",
Short: "Gather diagnostics information from the elastic-agent and running processes.",
Long: "Gather diagnostics information from the elastic-agent and running processes.",
Run: func(c *cobra.Command, args []string) {
if err := diagnosticCmd(streams, c, args); err != nil {
fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage())
os.Exit(1)
}
},
}

cmd.Flags().String("output", "human", "Output the diagnostics information in either human, json, or yaml (default: human)")

return cmd
}

// DiagnosticsInfo a struct to track all inforation related to diagnostics for the agent.
type DiagnosticsInfo struct {
ProcMeta []client.ProcMeta
AgentVersion client.Version
}

func diagnosticCmd(streams *cli.IOStreams, cmd *cobra.Command, args []string) error {
err := tryContainerLoadPaths()
if err != nil {
return err
}

output, _ := cmd.Flags().GetString("output")
outputFunc, ok := diagOutputs[output]
if !ok {
return fmt.Errorf("unsupported output: %s", output)
}

ctx := handleSignal(context.Background())
innerCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()

diag, err := getDiagnostics(innerCtx)
if err == context.DeadlineExceeded {
return errors.New("timed out after 30 seconds trying to connect to Elastic Agent daemon")
} else if err == context.Canceled {
return nil
} else if err != nil {
return fmt.Errorf("failed to communicate with Elastic Agent daemon: %s", err)
}

return outputFunc(streams.Out, diag)
}

func getDiagnostics(ctx context.Context) (DiagnosticsInfo, error) {
daemon := client.New()
diag := DiagnosticsInfo{}
err := daemon.Connect(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
defer daemon.Disconnect()

bv, err := daemon.ProcMeta(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
diag.ProcMeta = bv

version, err := daemon.Version(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
diag.AgentVersion = version

return diag, nil
}

func humanDiagnosticsOutput(w io.Writer, obj interface{}) error {
diag, ok := obj.(DiagnosticsInfo)
if !ok {
return fmt.Errorf("unable to cast %T as DiagnosticsInfo", obj)
}
return outputDiagnostics(w, diag)
}

func outputDiagnostics(w io.Writer, d DiagnosticsInfo) error {
tw := tabwriter.NewWriter(w, 4, 1, 2, ' ', 0)
fmt.Fprintf(tw, "elastic-agent\tversion: %s\n", d.AgentVersion.Version)
fmt.Fprintf(tw, "\tbuild_commit: %s\tbuild_time: %s\tsnapshot_build: %v\n", d.AgentVersion.Commit, d.AgentVersion.BuildTime, d.AgentVersion.Snapshot)
if len(d.ProcMeta) == 0 {
fmt.Fprintf(tw, "Applications: (none)\n")
} else {
fmt.Fprintf(tw, "Applications:\n")
for _, app := range d.ProcMeta {
fmt.Fprintf(tw, " *\tname: %s\troute_key: %s\n", app.Name, app.RouteKey)
if app.Error != "" {
fmt.Fprintf(tw, "\terror: %s\n", app.Error)
} else {
fmt.Fprintf(tw, "\tprocess: %s\tid: %s\tephemeral_id: %s\telastic_license: %v\n", app.Process, app.ID, app.EphemeralID, app.ElasticLicensed)
fmt.Fprintf(tw, "\tversion: %s\tcommit: %s\tbuild_time: %s\tbinary_arch: %v\n", app.Version, app.BuildCommit, app.BuildTime, app.BinaryArchitecture)
fmt.Fprintf(tw, "\thostname: %s\tusername: %s\tuser_id: %s\tuser_gid: %s\n", app.Hostname, app.Username, app.UserID, app.UserGID)
}

}
}
tw.Flush()
return nil
}
74 changes: 74 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/diagnostics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package cmd

import (
"os"
"time"

"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/control/client"
)

var testDiagnostics = DiagnosticsInfo{
AgentVersion: client.Version{
Version: "test-version",
Commit: "test-commit",
BuildTime: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
Snapshot: false,
},
ProcMeta: []client.ProcMeta{client.ProcMeta{
Process: "filebeat",
Name: "filebeat",
Hostname: "test-host",
ID: "filebeat-id",
EphemeralID: "filebeat-ephemeral-id",
Version: "filebeat-version",
BuildCommit: "filebeat-commit",
BuildTime: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
Username: "test-user",
UserID: "1000",
UserGID: "1000",
BinaryArchitecture: "test-architecture",
RouteKey: "test",
ElasticLicensed: true,
}, client.ProcMeta{
Process: "filebeat",
Name: "filebeat_monitoring",
Hostname: "test-host",
ID: "filebeat_monitoring-id",
EphemeralID: "filebeat_monitoring-ephemeral-id",
Version: "filebeat_monitoring-version",
BuildCommit: "filebeat_monitoring-commit",
BuildTime: time.Date(2021, 1, 1, 0, 0, 0, 0, time.UTC),
Username: "test-user",
UserID: "1000",
UserGID: "1000",
BinaryArchitecture: "test-architecture",
RouteKey: "test",
ElasticLicensed: true,
}, client.ProcMeta{
Name: "metricbeat",
RouteKey: "test",
Error: "failed to get metricbeat data",
}},
}

func Example_humanDiagnosticsOutput() {
humanDiagnosticsOutput(os.Stdout, testDiagnostics)
// Output:
// elastic-agent version: test-version
// build_commit: test-commit build_time: 2021-01-01 00:00:00 +0000 UTC snapshot_build: false
// Applications:
// * name: filebeat route_key: test
// process: filebeat id: filebeat-id ephemeral_id: filebeat-ephemeral-id elastic_license: true
// version: filebeat-version commit: filebeat-commit build_time: 2021-01-01 00:00:00 +0000 UTC binary_arch: test-architecture
// hostname: test-host username: test-user user_id: 1000 user_gid: 1000
// * name: filebeat_monitoring route_key: test
// process: filebeat id: filebeat_monitoring-id ephemeral_id: filebeat_monitoring-ephemeral-id elastic_license: true
// version: filebeat_monitoring-version commit: filebeat_monitoring-commit build_time: 2021-01-01 00:00:00 +0000 UTC binary_arch: test-architecture
// hostname: test-host username: test-user user_id: 1000 user_gid: 1000
// * name: metricbeat route_key: test
// error: failed to get metricbeat data
}
2 changes: 2 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ func run(streams *cli.IOStreams, override cfgOverrider) error {
return err
}

control.SetRouteFn(app.Routes)

serverStopFn, err := setupMetrics(agentInfo, logger, cfg.Settings.DownloadConfig.OS(), cfg.Settings.MonitoringConfig, app)
if err != nil {
return err
Expand Down
Loading

0 comments on commit b641d05

Please sign in to comment.