From 95265ef1353e2de15f2e4fa026b9ab4e46e53a14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vesa=20Hagstr=C3=B6m?= Date: Thu, 1 Sep 2022 13:25:27 +0300 Subject: [PATCH 1/6] In case of an "ACL not found" error remove the token associated with the service and check in the state, so that further attemts fall back to using the agent token. --- agent/local/state.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/agent/local/state.go b/agent/local/state.go index d6ff091ad4df..c2bba98a0b10 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -1318,7 +1318,10 @@ func (l *State) deleteService(key structs.ServiceID) error { l.logger.Info("Deregistered service", "service", key.ID) return nil - case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err): + case acl.IsErrNotFound(err): + l.services[key].Token = "" + fallthrough + case acl.IsErrPermissionDenied(err): // todo(fs): mark the service to be in sync to prevent excessive retrying before next full sync // todo(fs): some backoff strategy might be a better solution l.services[key].InSync = true @@ -1359,8 +1362,10 @@ func (l *State) deleteCheck(key structs.CheckID) error { l.pruneCheck(key) l.logger.Info("Deregistered check", "check", key.String()) return nil - - case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err): + case acl.IsErrNotFound(err): + l.checks[key].Token = "" + fallthrough + case acl.IsErrPermissionDenied(err): // todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync // todo(fs): some backoff strategy might be a better solution l.checks[key].InSync = true From 31c34dddfdc4d9717dcee2d3e9d0ddbbfd1aa82c Mon Sep 17 00:00:00 2001 From: Paul Glass Date: Fri, 27 Jan 2023 12:19:50 -0600 Subject: [PATCH 2/6] Use only the agent token for deregistration during anti-entropy The previous behavior had the agent attempt to use the "service" token (i.e. from the `token` field in a service definition file), and if that was not set, then it would use the agent token. The previous behavior was problematic because, if the service token had been deleted, the deregistration request would fail. The agent would retry the deregistration during each anti-entropy sync, and the situation would never resolve. The new behavior is to only/always use the agent token to service/check deregistration during anti-entropy. This is: * Simpler: No fallback logic to try different tokens * Faster (slightly): No time spent attempting the service token * Correct: The agent token is able to deregister services on that agent's node, because: * node:write permissions allow deregistration of services/checks on that node. * The agent token must have node:write permission, or else the agent is not be able to (de)register itself into the catalog --- agent/local/state.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/agent/local/state.go b/agent/local/state.go index c2bba98a0b10..d3ffa1e023ed 100644 --- a/agent/local/state.go +++ b/agent/local/state.go @@ -1293,7 +1293,12 @@ func (l *State) deleteService(key structs.ServiceID) error { return fmt.Errorf("ServiceID missing") } - st := l.aclTokenForServiceSync(key, l.tokens.AgentToken) + // Always use the agent token to delete without trying the service token. + // This works because the agent token really must have node:write + // permission and node:write allows deregistration of services/checks on + // that node. Because the service token may have been deleted, using the + // agent token without fallback logic is a bit faster, simpler, and safer. + st := l.tokens.AgentToken() req := structs.DeregisterRequest{ Datacenter: l.config.Datacenter, Node: l.config.NodeName, @@ -1318,10 +1323,7 @@ func (l *State) deleteService(key structs.ServiceID) error { l.logger.Info("Deregistered service", "service", key.ID) return nil - case acl.IsErrNotFound(err): - l.services[key].Token = "" - fallthrough - case acl.IsErrPermissionDenied(err): + case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err): // todo(fs): mark the service to be in sync to prevent excessive retrying before next full sync // todo(fs): some backoff strategy might be a better solution l.services[key].InSync = true @@ -1347,7 +1349,9 @@ func (l *State) deleteCheck(key structs.CheckID) error { return fmt.Errorf("CheckID missing") } - ct := l.aclTokenForCheckSync(key, l.tokens.AgentToken) + // Always use the agent token for deletion. Refer to deleteService() for + // an explanation. + ct := l.tokens.AgentToken() req := structs.DeregisterRequest{ Datacenter: l.config.Datacenter, Node: l.config.NodeName, @@ -1362,10 +1366,8 @@ func (l *State) deleteCheck(key structs.CheckID) error { l.pruneCheck(key) l.logger.Info("Deregistered check", "check", key.String()) return nil - case acl.IsErrNotFound(err): - l.checks[key].Token = "" - fallthrough - case acl.IsErrPermissionDenied(err): + + case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err): // todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync // todo(fs): some backoff strategy might be a better solution l.checks[key].InSync = true From 08c737fc037a8c05ad4c19140714cd18af835161 Mon Sep 17 00:00:00 2001 From: Paul Glass Date: Fri, 27 Jan 2023 13:44:46 -0600 Subject: [PATCH 3/6] Add changelog --- .changelog/16097.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/16097.txt diff --git a/.changelog/16097.txt b/.changelog/16097.txt new file mode 100644 index 000000000000..f707f2598e62 --- /dev/null +++ b/.changelog/16097.txt @@ -0,0 +1,3 @@ +```release-note:improvement +agent: only use the `agent` token and do not attempt using a service token for internal deregistration of checks and services +``` From ff9424b30c42e86479aeb3f055b0405eae2b687a Mon Sep 17 00:00:00 2001 From: Paul Glass Date: Sat, 28 Jan 2023 12:25:38 -0600 Subject: [PATCH 4/6] Update tests for agent token deregistration fix --- agent/local/state_test.go | 43 ++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/agent/local/state_test.go b/agent/local/state_test.go index 63256f8485f2..35a3869131f2 100644 --- a/agent/local/state_test.go +++ b/agent/local/state_test.go @@ -829,10 +829,6 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) { } var testRegisterRules = ` - node "" { - policy = "write" - } - service "api" { policy = "write" } @@ -863,6 +859,9 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) { defer a.Shutdown() testrpc.WaitForLeader(t, a.RPC, "dc1") + // The agent token is the only token used for deleteService. + setAgentToken(t, a) + token := createToken(t, a, testRegisterRules) // Create service (disallowed) @@ -1153,15 +1152,19 @@ type RPC interface { func createToken(t *testing.T, rpc RPC, policyRules string) string { t.Helper() + uniqueId, err := uuid.GenerateUUID() + require.NoError(t, err) + policyName := "the-policy-" + uniqueId + reqPolicy := structs.ACLPolicySetRequest{ Datacenter: "dc1", Policy: structs.ACLPolicy{ - Name: "the-policy", + Name: policyName, Rules: policyRules, }, WriteRequest: structs.WriteRequest{Token: "root"}, } - err := rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{}) + err = rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{}) require.NoError(t, err) token, err := uuid.GenerateUUID() @@ -1171,7 +1174,7 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string { Datacenter: "dc1", ACLToken: structs.ACLToken{ SecretID: token, - Policies: []structs.ACLTokenPolicyLink{{Name: "the-policy"}}, + Policies: []structs.ACLTokenPolicyLink{{Name: policyName}}, }, WriteRequest: structs.WriteRequest{Token: "root"}, } @@ -1180,6 +1183,29 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string { return token } +// setAgentToken sets the 'agent' token for this agent. It creates a new token +// with node:write for the agent's node name, and service:write for any +// service. +func setAgentToken(t *testing.T, a *agent.TestAgent) { + var policy = fmt.Sprintf(` + node "%s" { + policy = "write" + } + service_prefix "" { + policy = "read" + } + `, a.Config.NodeName) + + token := createToken(t, a, policy) + + _, err := a.Client().Agent().UpdateAgentACLToken(token, &api.WriteOptions{Token: "root"}) + if err != nil { + t.Fatalf("setting agent token: %v", err) + } + + t.Logf("set agent token: %v", token) +} + func TestAgentAntiEntropy_Checks(t *testing.T) { if testing.Short() { t.Skip("too slow for testing.Short") @@ -1481,6 +1507,9 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, dc) + // The agent token is the only token used for deleteCheck. + setAgentToken(t, a) + token := createToken(t, a, testRegisterRules) // Create services using the root token From 7ef8f7b9533fa1f6a4f3b84ac2d39f6351c5cbf4 Mon Sep 17 00:00:00 2001 From: Paul Glass Date: Mon, 30 Jan 2023 13:57:36 -0600 Subject: [PATCH 5/6] Address feedback --- .changelog/16097.txt | 4 ++-- agent/local/state_test.go | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.changelog/16097.txt b/.changelog/16097.txt index f707f2598e62..a66d76690c58 100644 --- a/.changelog/16097.txt +++ b/.changelog/16097.txt @@ -1,3 +1,3 @@ -```release-note:improvement -agent: only use the `agent` token and do not attempt using a service token for internal deregistration of checks and services +```release-note:bug-fix +agent: Only use the `agent` token for internal deregistration of checks and services during anti-entropy syncing. The service token, specified in the `token` field of the check or service definition, is no longer used for deregistration. This fixes an issue where the agent failed to ever deregseter a service or check if the service token was deleted. ``` diff --git a/agent/local/state_test.go b/agent/local/state_test.go index 35a3869131f2..6052fa4784d0 100644 --- a/agent/local/state_test.go +++ b/agent/local/state_test.go @@ -1202,8 +1202,6 @@ func setAgentToken(t *testing.T, a *agent.TestAgent) { if err != nil { t.Fatalf("setting agent token: %v", err) } - - t.Logf("set agent token: %v", token) } func TestAgentAntiEntropy_Checks(t *testing.T) { From c5862eb8161e7fe2ba36cbf654300e765edd5594 Mon Sep 17 00:00:00 2001 From: Paul Glass Date: Mon, 30 Jan 2023 14:43:37 -0600 Subject: [PATCH 6/6] Fix changelog --- .changelog/16097.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changelog/16097.txt b/.changelog/16097.txt index a66d76690c58..e25bf8961a8e 100644 --- a/.changelog/16097.txt +++ b/.changelog/16097.txt @@ -1,3 +1,3 @@ ```release-note:bug-fix -agent: Only use the `agent` token for internal deregistration of checks and services during anti-entropy syncing. The service token, specified in the `token` field of the check or service definition, is no longer used for deregistration. This fixes an issue where the agent failed to ever deregseter a service or check if the service token was deleted. +agent: Only use the `agent` token for internal deregistration of checks and services during anti-entropy syncing. The service token specified in the `token` field of the check or service definition is no longer used for deregistration. This fixes an issue where the agent failed to ever deregister a service or check if the service token was deleted. ```