hashicorp · pglass · Feb 3, 2023 · Sep 1, 2022 · Jan 27, 2023 · Jan 27, 2023
@@ -0,0 +1,3 @@
+```release-note:improvement
+agent: only use the `agent` token and do not attempt using a service token for internal deregistration of checks and services
+```
@@ -1293,7 +1293,12 @@ func (l *State) deleteService(key structs.ServiceID) error {
 		return fmt.Errorf("ServiceID missing")
 	}
 
-	st := l.aclTokenForServiceSync(key, l.tokens.AgentToken)
+	// Always use the agent token to delete without trying the service token.
+	// This works because the agent token really must have node:write
+	// permission and node:write allows deregistration of services/checks on
+	// that node. Because the service token may have been deleted, using the
+	// agent token without fallback logic is a bit faster, simpler, and safer.
+	st := l.tokens.AgentToken()
 	req := structs.DeregisterRequest{
 		Datacenter:     l.config.Datacenter,
 		Node:           l.config.NodeName,
@@ -1344,7 +1349,9 @@ func (l *State) deleteCheck(key structs.CheckID) error {
 		return fmt.Errorf("CheckID missing")
 	}
 
-	ct := l.aclTokenForCheckSync(key, l.tokens.AgentToken)
+	// Always use the agent token for deletion. Refer to deleteService() for
+	// an explanation.
+	ct := l.tokens.AgentToken()
 	req := structs.DeregisterRequest{
 		Datacenter:     l.config.Datacenter,
 		Node:           l.config.NodeName,

@@ -829,10 +829,6 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
 }
 
 var testRegisterRules = `
- node "" {
- 	policy = "write"
- }
-
  service "api" {
  	policy = "write"
  }
@@ -863,6 +859,9 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
 	defer a.Shutdown()
 	testrpc.WaitForLeader(t, a.RPC, "dc1")
 
+	// The agent token is the only token used for deleteService.
+	setAgentToken(t, a)
+
 	token := createToken(t, a, testRegisterRules)
 
 	// Create service (disallowed)
@@ -1153,15 +1152,19 @@ type RPC interface {
 func createToken(t *testing.T, rpc RPC, policyRules string) string {
 	t.Helper()
 
+	uniqueId, err := uuid.GenerateUUID()
+	require.NoError(t, err)
+	policyName := "the-policy-" + uniqueId
+
 	reqPolicy := structs.ACLPolicySetRequest{
 		Datacenter: "dc1",
 		Policy: structs.ACLPolicy{
-			Name:  "the-policy",
+			Name:  policyName,
 			Rules: policyRules,
 		},
 		WriteRequest: structs.WriteRequest{Token: "root"},
 	}
-	err := rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{})
+	err = rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{})
 	require.NoError(t, err)
 
 	token, err := uuid.GenerateUUID()
@@ -1171,7 +1174,7 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string {
 		Datacenter: "dc1",
 		ACLToken: structs.ACLToken{
 			SecretID: token,
-			Policies: []structs.ACLTokenPolicyLink{{Name: "the-policy"}},
+			Policies: []structs.ACLTokenPolicyLink{{Name: policyName}},
 		},
 		WriteRequest: structs.WriteRequest{Token: "root"},
 	}
@@ -1180,6 +1183,29 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string {
 	return token
 }
 
+// setAgentToken sets the 'agent' token for this agent. It creates a new token
+// with node:write for the agent's node name, and service:write for any
+// service.
+func setAgentToken(t *testing.T, a *agent.TestAgent) {
+	var policy = fmt.Sprintf(`
+	  node "%s" {
+		policy = "write"
+	  }
+	  service_prefix "" {
+		policy = "read"
+	  }
+	`, a.Config.NodeName)
+
+	token := createToken(t, a, policy)
+
+	_, err := a.Client().Agent().UpdateAgentACLToken(token, &api.WriteOptions{Token: "root"})
+	if err != nil {
+		t.Fatalf("setting agent token: %v", err)
+	}
+
+	t.Logf("set agent token: %v", token)
+}
+
 func TestAgentAntiEntropy_Checks(t *testing.T) {
 	if testing.Short() {
 		t.Skip("too slow for testing.Short")
@@ -1481,6 +1507,9 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {
 
 	testrpc.WaitForLeader(t, a.RPC, dc)
 
+	// The agent token is the only token used for deleteCheck.
+	setAgentToken(t, a)
+
 	token := createToken(t, a, testRegisterRules)
 
 	// Create services using the root token