Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add start of elastic-agent diagnostics command #28265

Merged
merged 14 commits into from
Oct 15, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- Update ECS to 1.12.0. {pull}27770[27770]
- Fields mapped as `match_only_text` will automatically fallback to a `text` mapping when using Elasticsearch versions that do not support `match_only_text`. {pull}27770[27770]
- Update cloud.google.com/go library. {pull}28229[28229]
- Add additional metadata to the root HTTP endpoint. {pull}28265[28265]
- Upgrade k8s.io/client-go library. {pull}28228[28228]

*Auditbeat*
Expand Down
20 changes: 18 additions & 2 deletions libbeat/cmd/instance/beat.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"math/big"
"math/rand"
"os"
"os/user"
"runtime"
"runtime/debug"
"strings"
Expand Down Expand Up @@ -162,7 +163,7 @@ func Run(settings Settings, bt beat.Creator) error {

name := settings.Name
idxPrefix := settings.IndexPrefix
version := settings.Version
michel-laterman marked this conversation as resolved.
Show resolved Hide resolved
agentVersion := settings.Version
elasticLicensed := settings.ElasticLicensed

return handleError(func() error {
Expand All @@ -172,7 +173,7 @@ func Run(settings Settings, bt beat.Creator) error {
"panic", r, zap.Stack("stack"))
}
}()
b, err := NewBeat(name, idxPrefix, version, elasticLicensed)
b, err := NewBeat(name, idxPrefix, agentVersion, elasticLicensed)
if err != nil {
return err
}
Expand All @@ -184,6 +185,20 @@ func Run(settings Settings, bt beat.Creator) error {
monitoring.NewString(registry, "name").Set(b.Info.Name)
monitoring.NewString(registry, "hostname").Set(b.Info.Hostname)

// Add more beat metadata
monitoring.NewString(registry, "binary_arch").Set(runtime.GOARCH)
monitoring.NewString(registry, "build_commit").Set(version.Commit())
monitoring.NewTimestamp(registry, "build_time").Set(version.BuildTime())
monitoring.NewBool(registry, "elastic_licensed").Set(b.Info.ElasticLicensed)

u, err := user.Current()
if err != nil {
return err
}
monitoring.NewString(registry, "username").Set(u.Username)
monitoring.NewString(registry, "uid").Set(u.Uid)
monitoring.NewString(registry, "gid").Set(u.Gid)

// Add additional info to state registry. This is also reported to monitoring
stateRegistry := monitoring.GetNamespace("state").GetRegistry()
serviceRegistry := stateRegistry.NewRegistry("service")
Expand Down Expand Up @@ -414,6 +429,7 @@ func (b *Beat) launch(settings Settings, bt beat.Creator) error {
// Set Beat ID in registry vars, in case it was loaded from meta file
infoRegistry := monitoring.GetNamespace("info").GetRegistry()
monitoring.NewString(infoRegistry, "uuid").Set(b.Info.ID.String())
monitoring.NewString(infoRegistry, "ephemeral_id").Set(b.Info.EphemeralID.String())

serviceRegistry := monitoring.GetNamespace("state").GetRegistry().GetRegistry("service")
monitoring.NewString(serviceRegistry, "id").Set(b.Info.ID.String())
Expand Down
1 change: 1 addition & 0 deletions x-pack/elastic-agent/CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,4 @@
- Agent now adapts the beats queue size based on output settings. {issue}26638[26638] {pull}27429[27429]
- Support ephemeral containers in Kubernetes dynamic provider. {issue}27020[#27020] {pull}27707[27707]
- Add complete k8s metadata through composable provider. {pull}27691[27691]
- Add diagnostics command to gather beat metadata. {pull}28265[28265]
27 changes: 27 additions & 0 deletions x-pack/elastic-agent/control.proto
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,25 @@ message ApplicationStatus {
string payload = 5;
}

// Current metadata for a running beat.
message BeatMeta {
string beat = 1;
string name = 2;
string hostname = 3;
string id = 4;
string ephemeral_id = 5;
string version = 6;
string build_commit = 7;
string build_time = 8;
string username = 9;
string user_id = 10;
string user_gid = 11;
string architecture = 12;
string route_key = 13;
bool elastic_licensed = 14;
string error = 15;
}

// Status is the current status of Elastic Agent.
message StatusResponse {
// Overall status of Elastic Agent.
Expand All @@ -104,6 +123,11 @@ message StatusResponse {
repeated ApplicationStatus applications = 3;
}

// BeatMetaResponse is the current running version infomation for all Beats.
message BeatMetaResponse {
repeated BeatMeta beats = 1;
}

service ElasticAgentControl {
// Fetches the currently running version of the Elastic Agent.
rpc Version(Empty) returns (VersionResponse);
Expand All @@ -116,4 +140,7 @@ service ElasticAgentControl {

// Upgrade starts the upgrade process of Elastic Agent.
rpc Upgrade(UpgradeRequest) returns (UpgradeResponse);

// Gather all running beat metadata.
rpc BeatMeta(Empty) returns (BeatMetaResponse);
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package stream
import (
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/application/pipeline"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/configrequest"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/program"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/logger"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/core/state"
)
Expand All @@ -20,6 +21,10 @@ type stater interface {
State() map[string]state.State
}

type specer interface {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably you meant specser. i dont know if it is a real world but it sounds funny :-)
no more naming comments from me

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think specser is a bit too strange, it does more to hide what the interface is doing than specer

Specs() map[string]program.Spec
}

func (b *operatorStream) Close() error {
return b.configHandler.Close()
}
Expand All @@ -32,6 +37,13 @@ func (b *operatorStream) State() map[string]state.State {
return nil
}

func (b *operatorStream) Specs() map[string]program.Spec {
if s, ok := b.configHandler.(specer); ok {
return s.Specs()
}
return nil
}

func (b *operatorStream) Execute(cfg configrequest.Request) error {
return b.configHandler.HandleConfig(cfg)
}
Expand Down
1 change: 1 addition & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func NewCommandWithArgs(args []string, streams *cli.IOStreams) *cobra.Command {
cmd.AddCommand(newWatchCommandWithArgs(args, streams))
cmd.AddCommand(newContainerCommand(args, streams))
cmd.AddCommand(newStatusCommand(args, streams))
cmd.AddCommand(newDiagnosticsCommand(args, streams))

// windows special hidden sub-command (only added on windows)
reexec := newReExecWindowsCommand(args, streams)
Expand Down
87 changes: 87 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/diagnostics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package cmd

import (
"context"
"fmt"
"os"
"time"

"github.com/spf13/cobra"

"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/control/client"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/agent/errors"
"github.com/elastic/beats/v7/x-pack/elastic-agent/pkg/cli"
)

func newDiagnosticsCommand(_ []string, streams *cli.IOStreams) *cobra.Command {
cmd := &cobra.Command{
Use: "diagnostics",
Short: "Diagnostics gather diagnostics information from the elastic-agent and running processes.",
Long: "Diagnostics gather diagnostics information from the elastic-agent and running processes.",
Run: func(c *cobra.Command, args []string) {
if err := diagnosticCmd(streams, c, args); err != nil {
fmt.Fprintf(streams.Err, "Error: %v\n%s\n", err, troubleshootMessage())
os.Exit(1)
}
},
}

return cmd
}

// DiagnosticsInfo a struct to track all inforation related to diagnostics for the agent.
type DiagnosticsInfo struct {
BeatMeta []client.BeatMeta
AgentVersion client.Version
}

func diagnosticCmd(streams *cli.IOStreams, cmd *cobra.Command, args []string) error {
err := tryContainerLoadPaths()
if err != nil {
return err
}

ctx := handleSignal(context.Background())
innerCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()

diag, err := getDiagnostics(innerCtx)
if err == context.DeadlineExceeded {
return errors.New("timed out after 30 seconds trying to connect to Elastic Agent daemon")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

30s sounds like a long time for a timeout for a local diagnostic commands. I would expect the diagnostic command to be pretty quick or is there some "remote" dependency?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copied timeout from the status command

} else if err == context.Canceled {
return nil
} else if err != nil {
return fmt.Errorf("failed to communicate with Elastic Agent daemon: %s", err)
}

fmt.Fprintf(streams.Out, "%+v\n", diag)
return nil
}

func getDiagnostics(ctx context.Context) (DiagnosticsInfo, error) {
daemon := client.New()
diag := DiagnosticsInfo{}
err := daemon.Connect(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
defer daemon.Disconnect()

bv, err := daemon.BeatMeta(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
diag.BeatMeta = bv

version, err := daemon.Version(ctx)
if err != nil {
return DiagnosticsInfo{}, err
}
diag.AgentVersion = version

return diag, nil
}
2 changes: 2 additions & 0 deletions x-pack/elastic-agent/pkg/agent/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ func run(streams *cli.IOStreams, override cfgOverrider) error {
return err
}

control.SetRouteFn(app.Routes)

serverStopFn, err := setupMetrics(agentInfo, logger, cfg.Settings.DownloadConfig.OS(), cfg.Settings.MonitoringConfig, app)
if err != nil {
return err
Expand Down
63 changes: 63 additions & 0 deletions x-pack/elastic-agent/pkg/agent/control/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,25 @@ type ApplicationStatus struct {
Payload map[string]interface{}
}

// BeatMeta is the running version and ID inforation for a running beat.
type BeatMeta struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about endpoint and other processes we run? If possible we should keep the naming not specific to Beats only.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, Do we know the kind of meta-data endpoint or another process would return? Or should I change this to just be a generic map[string]interface{} to capture everything?

Beat string
Name string
Hostname string
ID string
EphemeralID string
Version string
BuildCommit string
BuildTime time.Time
Username string
UserID string
UserGID string
BinaryArchitecture string
RouteKey string
ElasticLicensed bool
Error string
}

// AgentStatus is the current status of the Elastic Agent.
type AgentStatus struct {
Status Status
Expand All @@ -74,6 +93,8 @@ type Client interface {
Restart(ctx context.Context) error
// Upgrade triggers upgrade of the current running daemon.
Upgrade(ctx context.Context, version string, sourceURI string) (string, error)
// BeatMeta gathers running beat meta-data.
BeatMeta(ctx context.Context) ([]BeatMeta, error)
}

// client manages the state and communication to the Elastic Agent.
Expand Down Expand Up @@ -184,3 +205,45 @@ func (c *client) Upgrade(ctx context.Context, version string, sourceURI string)
}
return res.Version, nil
}

// BeatMeta gathers running beat metadata.
func (c *client) BeatMeta(ctx context.Context) ([]BeatMeta, error) {
bv, err := c.client.BeatMeta(ctx, &proto.Empty{})
if err != nil {
return nil, err
}
beatMeta := []BeatMeta{}

for _, bm := range bv.Beats {
meta := BeatMeta{
Beat: bm.Beat,
Name: bm.Name,
Hostname: bm.Hostname,
ID: bm.Id,
EphemeralID: bm.EphemeralId,
Version: bm.Version,
BuildCommit: bm.BuildCommit,
Username: bm.Username,
UserID: bm.UserId,
UserGID: bm.UserGid,
BinaryArchitecture: bm.Architecture,
RouteKey: bm.RouteKey,
ElasticLicensed: bm.ElasticLicensed,
Error: bm.Error,
}
if bm.BuildTime != "" {
ts, err := time.Parse(time.RFC3339, bm.BuildTime)
if err != nil {
if meta.Error != "" {
meta.Error += ", " + err.Error()
} else {
meta.Error = err.Error()
}
} else {
meta.BuildTime = ts
}
}
beatMeta = append(beatMeta, meta)
}
return beatMeta, nil
}
Loading