Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use agent token for service/check deregistration during anti-entropy #16097

Merged
merged 6 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/16097.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
agent: only use the `agent` token and do not attempt using a service token for internal deregistration of checks and services
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you think of a scenario by which someone may view this as a breaking change in behavior that they were relying on?

Copy link
Author

@pglass pglass Jan 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I could phrase this as a breaking change by saying: You now must have an agent token configured with node:write permission for a client agent to deregister services and checks.

(I feel like this was sort of already a requirement for the agent to function correctly though.)

I was also considering bug-fix here, since it fixes the case where a service token has been deleted and the agent never deregisters the service.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the changelog entry to be a bug-fix now. Let me know what you think!

```
11 changes: 9 additions & 2 deletions agent/local/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -1293,7 +1293,12 @@ func (l *State) deleteService(key structs.ServiceID) error {
return fmt.Errorf("ServiceID missing")
}

st := l.aclTokenForServiceSync(key, l.tokens.AgentToken)
// Always use the agent token to delete without trying the service token.
// This works because the agent token really must have node:write
// permission and node:write allows deregistration of services/checks on
// that node. Because the service token may have been deleted, using the
// agent token without fallback logic is a bit faster, simpler, and safer.
st := l.tokens.AgentToken()
req := structs.DeregisterRequest{
Datacenter: l.config.Datacenter,
Node: l.config.NodeName,
Expand Down Expand Up @@ -1344,7 +1349,9 @@ func (l *State) deleteCheck(key structs.CheckID) error {
return fmt.Errorf("CheckID missing")
}

ct := l.aclTokenForCheckSync(key, l.tokens.AgentToken)
// Always use the agent token for deletion. Refer to deleteService() for
// an explanation.
ct := l.tokens.AgentToken()
req := structs.DeregisterRequest{
Datacenter: l.config.Datacenter,
Node: l.config.NodeName,
Expand Down
43 changes: 36 additions & 7 deletions agent/local/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -829,10 +829,6 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
}

var testRegisterRules = `
node "" {
policy = "write"
}

service "api" {
policy = "write"
}
Expand Down Expand Up @@ -863,6 +859,9 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
defer a.Shutdown()
testrpc.WaitForLeader(t, a.RPC, "dc1")

// The agent token is the only token used for deleteService.
setAgentToken(t, a)

token := createToken(t, a, testRegisterRules)

// Create service (disallowed)
Expand Down Expand Up @@ -1153,15 +1152,19 @@ type RPC interface {
func createToken(t *testing.T, rpc RPC, policyRules string) string {
t.Helper()

uniqueId, err := uuid.GenerateUUID()
require.NoError(t, err)
policyName := "the-policy-" + uniqueId

reqPolicy := structs.ACLPolicySetRequest{
Datacenter: "dc1",
Policy: structs.ACLPolicy{
Name: "the-policy",
Name: policyName,
Rules: policyRules,
},
WriteRequest: structs.WriteRequest{Token: "root"},
}
err := rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{})
err = rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{})
require.NoError(t, err)

token, err := uuid.GenerateUUID()
Expand All @@ -1171,7 +1174,7 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string {
Datacenter: "dc1",
ACLToken: structs.ACLToken{
SecretID: token,
Policies: []structs.ACLTokenPolicyLink{{Name: "the-policy"}},
Policies: []structs.ACLTokenPolicyLink{{Name: policyName}},
},
WriteRequest: structs.WriteRequest{Token: "root"},
}
Expand All @@ -1180,6 +1183,29 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string {
return token
}

// setAgentToken sets the 'agent' token for this agent. It creates a new token
// with node:write for the agent's node name, and service:write for any
// service.
func setAgentToken(t *testing.T, a *agent.TestAgent) {
var policy = fmt.Sprintf(`
node "%s" {
policy = "write"
}
service_prefix "" {
policy = "read"
}
`, a.Config.NodeName)

token := createToken(t, a, policy)

_, err := a.Client().Agent().UpdateAgentACLToken(token, &api.WriteOptions{Token: "root"})
if err != nil {
t.Fatalf("setting agent token: %v", err)
}

t.Logf("set agent token: %v", token)
}

func TestAgentAntiEntropy_Checks(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
Expand Down Expand Up @@ -1481,6 +1507,9 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {

testrpc.WaitForLeader(t, a.RPC, dc)

// The agent token is the only token used for deleteCheck.
setAgentToken(t, a)

token := createToken(t, a, testRegisterRules)

// Create services using the root token
Expand Down