From c113a26fe016b3ede8213f6c9d59c52cf83a8e88 Mon Sep 17 00:00:00 2001 From: Anderson Queiroz Date: Fri, 6 Sep 2024 14:35:38 +0200 Subject: [PATCH] fix state store SetAction panic (#5438) * fix state store SetAction panic The state store SetAction did not correctly cover the case where a nil action is passed as parameter. The unenroll handler might pass a nil action if the unenroll is a auto-unenroll, that means, it does not come from fleet Also the unenroll handler does checks for a nil state store anymore, it assumes it's valid just as it does for all other dependencies. --- ...23126-fix-state-store-SetAction-panic.yaml | 32 +++++++ .../handlers/handler_action_unenroll.go | 10 +-- .../handlers/handler_action_unenroll_test.go | 29 +++++- .../pkg/agent/storage/store/state_store.go | 13 +++ .../agent/storage/store/state_store_test.go | 90 ++++++++++++++++++- 5 files changed, 162 insertions(+), 12 deletions(-) create mode 100644 changelog/fragments/1725523126-fix-state-store-SetAction-panic.yaml diff --git a/changelog/fragments/1725523126-fix-state-store-SetAction-panic.yaml b/changelog/fragments/1725523126-fix-state-store-SetAction-panic.yaml new file mode 100644 index 00000000000..3071af824f9 --- /dev/null +++ b/changelog/fragments/1725523126-fix-state-store-SetAction-panic.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Fix the Elastic Agent crashing when self unenrolling due to too many authentication failures against Fleet Server + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: state-store + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/5434 diff --git a/internal/pkg/agent/application/actions/handlers/handler_action_unenroll.go b/internal/pkg/agent/application/actions/handlers/handler_action_unenroll.go index 4b4c9c4dfdb..c6b03f1a03a 100644 --- a/internal/pkg/agent/application/actions/handlers/handler_action_unenroll.go +++ b/internal/pkg/agent/application/actions/handlers/handler_action_unenroll.go @@ -92,12 +92,10 @@ func (h *Unenroll) Handle(ctx context.Context, a fleetapi.Action, acker acker.Ac unenrollPolicy := newPolicyChange(ctx, config.New(), a, acker, true) h.ch <- unenrollPolicy - if h.stateStore != nil { - // backup action for future start to avoid starting fleet gateway loop - h.stateStore.SetAction(a) - if err := h.stateStore.Save(); err != nil { - h.log.Warnf("Failed to update state store: %v", err) - } + // backup action for future start to avoid starting fleet gateway loop + h.stateStore.SetAction(a) + if err := h.stateStore.Save(); err != nil { + h.log.Warnf("Failed to update state store: %v", err) } unenrollCtx, cancel := context.WithTimeout(ctx, unenrollTimeout) diff --git a/internal/pkg/agent/application/actions/handlers/handler_action_unenroll_test.go b/internal/pkg/agent/application/actions/handlers/handler_action_unenroll_test.go index 79ab501d829..101de4cd3df 100644 --- a/internal/pkg/agent/application/actions/handlers/handler_action_unenroll_test.go +++ b/internal/pkg/agent/application/actions/handlers/handler_action_unenroll_test.go @@ -6,12 +6,15 @@ package handlers import ( "context" + "path/filepath" "sync/atomic" "testing" "github.com/elastic/elastic-agent-client/v7/pkg/client" "github.com/elastic/elastic-agent-client/v7/pkg/proto" "github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator" + "github.com/elastic/elastic-agent/internal/pkg/agent/storage" + "github.com/elastic/elastic-agent/internal/pkg/agent/storage/store" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" "github.com/elastic/elastic-agent/pkg/component" "github.com/elastic/elastic-agent/pkg/component/runtime" @@ -105,7 +108,13 @@ func TestActionUnenrollHandler(t *testing.T) { } }() - handler := NewUnenroll(log, coord, ch, nil, nil) + storePath := filepath.Join(t.TempDir(), "state.json") + s, err := storage.NewDiskStore(storePath) + require.NoError(t, err, "failed creating DiskStore") + + st, err := store.NewStateStore(log, s) + require.NoError(t, err) + handler := NewUnenroll(log, coord, ch, nil, st) getTamperProtectionFunc := func(enabled bool) func() bool { return func() bool { @@ -119,9 +128,14 @@ func TestActionUnenrollHandler(t *testing.T) { wantErr error // Handler error wantPerformedActions int64 tamperProtectionFn func() bool + autoUnenroll bool }{ { - name: "no running components", + name: "fleet unenroll with no running components", + }, + { + name: "auto unenroll with no running components", + autoUnenroll: true, }, { name: "endpoint no dispatch", @@ -200,10 +214,17 @@ func TestActionUnenrollHandler(t *testing.T) { handler.tamperProtectionFn = tc.tamperProtectionFn } - err := handler.Handle(ctx, action, acker) + a := new(fleetapi.ActionUnenroll) + *a = *action + if tc.autoUnenroll { + a.IsDetected = true + } + err := handler.Handle(ctx, a, acker) require.ErrorIs(t, err, tc.wantErr) - if tc.wantErr == nil { + + // autoUnenroll isn't acked + if tc.wantErr == nil && !tc.autoUnenroll { require.Len(t, acker.Acked, 1) } require.Equal(t, tc.wantPerformedActions, coord.performedActions.Load()) diff --git a/internal/pkg/agent/storage/store/state_store.go b/internal/pkg/agent/storage/store/state_store.go index f3fdb7f8bda..0c9bb2c5c39 100644 --- a/internal/pkg/agent/storage/store/state_store.go +++ b/internal/pkg/agent/storage/store/state_store.go @@ -10,6 +10,7 @@ import ( "encoding/json" "fmt" "io" + "reflect" "sync" "github.com/elastic/elastic-agent/internal/pkg/agent/storage" @@ -169,6 +170,18 @@ func (s *StateStore) SetAction(a fleetapi.Action) { s.mx.Lock() defer s.mx.Unlock() + // the reflect.ValueOf(v).IsNil() is required as the type of 'v' on switch + // clause with multiple types will, in this case, preserve the original type. + // See details on https://go.dev/ref/spec#Type_switches + // Without using reflect accessing the concrete type stored in the interface + // isn't possible and as a is of type fleetapi.Action and has a concrete + // value, a is never nil, neither v is nil as it has the same type of a + // on both clauses. + if a == nil || reflect.ValueOf(a).IsNil() { + s.log.Debugf("trying to set an nil '%T' action, ignoring the action", a) + return + } + switch v := a.(type) { // If any new action type is added, don't forget to update the method's // description. diff --git a/internal/pkg/agent/storage/store/state_store_test.go b/internal/pkg/agent/storage/store/state_store_test.go index 0c1b18e661c..5ed1ae2d81f 100644 --- a/internal/pkg/agent/storage/store/state_store_test.go +++ b/internal/pkg/agent/storage/store/state_store_test.go @@ -20,7 +20,7 @@ import ( "github.com/elastic/elastic-agent/internal/pkg/agent/storage" "github.com/elastic/elastic-agent/internal/pkg/agent/vault" "github.com/elastic/elastic-agent/internal/pkg/fleetapi" - "github.com/elastic/elastic-agent/pkg/core/logger" + "github.com/elastic/elastic-agent/pkg/core/logger/loggertest" ) type wrongAction struct{} @@ -61,7 +61,93 @@ func createAgentVaultAndSecret(t *testing.T, ctx context.Context, tempDir string } func runTestStateStore(t *testing.T, ackToken string) { - log, _ := logger.New("state_store", false) + log, _ := loggertest.New("state_store") + + t.Run("SetAction corner case", func(t *testing.T) { + + t.Run("nil fleetapi.Action", func(t *testing.T) { + var action fleetapi.Action + + storePath := filepath.Join(t.TempDir(), "state.json") + s, err := storage.NewDiskStore(storePath) + require.NoError(t, err, "failed creating DiskStore") + + store, err := NewStateStore(log, s) + require.NoError(t, err) + require.Nil(t, store.Action()) + + store.SetAction(action) + store.SetAckToken(ackToken) + err = store.Save() + require.NoError(t, err) + + assert.Empty(t, store.Action()) + assert.Empty(t, store.Queue()) + assert.Equal(t, ackToken, store.AckToken()) + }) + + t.Run("nil concrete and accepted action", func(t *testing.T) { + var actionUnenroll *fleetapi.ActionUnenroll + actionPolicyChange := &fleetapi.ActionPolicyChange{ + ActionID: "abc123", + } + + storePath := filepath.Join(t.TempDir(), "state.json") + s, err := storage.NewDiskStore(storePath) + require.NoError(t, err, "failed creating DiskStore") + + store, err := NewStateStore(log, s) + require.NoError(t, err) + require.Nil(t, store.Action()) + + // 1st set an action + store.SetAction(actionPolicyChange) + store.SetAckToken(ackToken) + err = store.Save() + require.NoError(t, err) + + // then try to set a nil action + store.SetAction(actionUnenroll) + store.SetAckToken(ackToken) + err = store.Save() + require.NoError(t, err) + + assert.Equal(t, actionPolicyChange, store.Action()) + assert.Empty(t, store.Queue()) + assert.Equal(t, ackToken, store.AckToken()) + }) + + t.Run("nil concrete and ignored action", func(t *testing.T) { + var actionUnknown *fleetapi.ActionUnknown + actionPolicyChange := &fleetapi.ActionPolicyChange{ + ActionID: "abc123", + } + + storePath := filepath.Join(t.TempDir(), "state.json") + s, err := storage.NewDiskStore(storePath) + require.NoError(t, err, "failed creating DiskStore") + + store, err := NewStateStore(log, s) + require.NoError(t, err) + require.Nil(t, store.Action()) + + // 1st set an action + store.SetAction(actionPolicyChange) + store.SetAckToken(ackToken) + err = store.Save() + require.NoError(t, err) + + // then try to set a nil action + store.SetAction(actionUnknown) + store.SetAckToken(ackToken) + err = store.Save() + require.NoError(t, err) + + assert.Equal(t, actionPolicyChange, store.Action()) + assert.Empty(t, store.Queue()) + assert.Equal(t, ackToken, store.AckToken()) + }) + }) t.Run("store is not dirty on successful save", func(t *testing.T) { storePath := filepath.Join(t.TempDir(), "state.json")