Skip to content

Commit

Permalink
auth: enforce use of node secret and remove legacy auth
Browse files Browse the repository at this point in the history
As of Nomad 1.6.0, Nomad client agents send their secret with all the
RPCs (other than registration). But for backwards compatibility we had to keep
a legacy auth method that didn't require the node secret. We've previously
announced that this legacy auth method would be removed and that nodes older
than 1.6.0 would not be supported with Nomad 1.9.0.

This changeset removes the legacy auth method.

Ref: https://developer.hashicorp.com/nomad/docs/release-notes/nomad/upcoming#nomad-1-9-0
  • Loading branch information
tgross committed Aug 16, 2024
1 parent d6be784 commit ef5c933
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 69 deletions.
3 changes: 3 additions & 0 deletions .changelog/23838.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:breaking-change
heartbeats: clients older than 1.6.0 will fail heartbeats to 1.9.0+ servers
```
4 changes: 0 additions & 4 deletions nomad/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ func (s *Server) AuthenticateClientOnly(ctx *RPCContext, args structs.RequestWit
return s.auth.AuthenticateClientOnly(ctx, args)
}

func (s *Server) AuthenticateClientOnlyLegacy(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {
return s.auth.AuthenticateClientOnlyLegacy(ctx, args)
}

func (s *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
return s.auth.ResolveACL(args)
}
Expand Down
32 changes: 0 additions & 32 deletions nomad/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,38 +328,6 @@ func (s *Authenticator) AuthenticateClientOnly(ctx RPCContext, args structs.Requ
return acl.ClientACL, nil
}

// AuthenticateClientOnlyLegacy is a version of AuthenticateClientOnly that's
// used by a few older RPCs that did not properly enforce node secrets.
// COMPAT(1.8.0): In Nomad 1.6.0 we starting sending those node secrets, so we
// can remove this in Nomad 1.8.0.
func (s *Authenticator) AuthenticateClientOnlyLegacy(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {

remoteIP, err := ctx.GetRemoteIP() // capture for metrics
if err != nil {
s.logger.Error("could not determine remote address", "error", err)
}

identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP}
defer args.SetIdentity(identity) // always set the identity, even on errors

if s.verifyTLS && !ctx.IsStatic() {
tlsCert := ctx.Certificate()
if tlsCert == nil {
return nil, errors.New("missing certificate information")
}

// set on the identity whether or not its valid for server RPC, so we
// can capture it for metrics
identity.TLSName = tlsCert.Subject.CommonName
_, err := validateCertificateForNames(tlsCert, s.validClientCertNames)
if err != nil {
return nil, err
}
}

return acl.ClientACL, nil
}

// validateCertificateForNames returns true if the certificate is valid for any
// of the given domain names.
func validateCertificateForNames(cert *x509.Certificate, expectedNames []string) (bool, error) {
Expand Down
14 changes: 10 additions & 4 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,8 @@ func (n *Node) deregister(args *structs.NodeBatchDeregisterRequest,
// │ │
// └──── ready ─────┘
func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
// UpdateStatus receives requests from client and servers that mark failed
// heartbeats, so we can't use AuthenticateClientOnly
authErr := n.srv.Authenticate(n.ctx, args)

isForwarded := args.IsForwarded()
Expand All @@ -560,6 +562,12 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct

defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())

if aclObj, err := n.srv.ResolveACL(args); err != nil {
return structs.ErrPermissionDenied
} else if !(aclObj.AllowClientOp() || aclObj.AllowServerOp()) {
return structs.ErrPermissionDenied
}

// Verify the arguments
if args.NodeID == "" {
return fmt.Errorf("missing node ID for client status update")
Expand Down Expand Up @@ -1296,8 +1304,7 @@ func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
// - The node status is down or disconnected. Clients must call the
// UpdateStatus method to update its status in the server.
func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
// COMPAT(1.9.0): move to AuthenticateClientOnly
aclObj, err := n.srv.AuthenticateClientOnlyLegacy(n.ctx, args)
aclObj, err := n.srv.AuthenticateClientOnly(n.ctx, args)
n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args)
if err != nil {
return structs.ErrPermissionDenied
Expand Down Expand Up @@ -2230,8 +2237,7 @@ func taskUsesConnect(task *structs.Task) bool {
}

func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error {
// COMPAT(1.9.0): move to AuthenticateClientOnly
aclObj, err := n.srv.AuthenticateClientOnlyLegacy(n.ctx, args)
aclObj, err := n.srv.AuthenticateClientOnly(n.ctx, args)
n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args)
if err != nil {
return structs.ErrPermissionDenied
Expand Down
53 changes: 27 additions & 26 deletions nomad/node_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func TestClientEndpoint_Register_NodePool_Multiregion(t *testing.T) {
go heartbeat(ctx, rpcClient(t, s1), &structs.NodeUpdateStatusRequest{
NodeID: node1.ID,
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: "region-1"},
WriteRequest: structs.WriteRequest{Region: "region-1", AuthToken: node1.SecretID},
})

// Verify client becomes ready.
Expand Down Expand Up @@ -403,7 +403,7 @@ func TestClientEndpoint_Register_NodePool_Multiregion(t *testing.T) {
go heartbeat(ctx, rpcClient(t, s2), &structs.NodeUpdateStatusRequest{
NodeID: node2.ID,
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: "region-2"},
WriteRequest: structs.WriteRequest{Region: "region-2", AuthToken: node2.SecretID},
})

// Verify client is kept at the initializing status and has a node pool
Expand Down Expand Up @@ -526,7 +526,7 @@ func TestClientEndpoint_DeregisterOne(t *testing.T) {
// Deregister
dereg := &structs.NodeDeregisterRequest{
NodeID: node.ID,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.GenericResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.Deregister", dereg, &resp2); err != nil {
Expand Down Expand Up @@ -574,7 +574,7 @@ func TestClientEndpoint_Deregister_ACL(t *testing.T) {
// Deregister without any token and expect it to fail
dereg := &structs.NodeBatchDeregisterRequest{
NodeIDs: []string{node.ID},
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp structs.GenericResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.BatchDeregister", dereg, &resp); err == nil {
Expand All @@ -600,7 +600,7 @@ func TestClientEndpoint_Deregister_ACL(t *testing.T) {
// Deregister with an invalid token.
dereg1 := &structs.NodeBatchDeregisterRequest{
NodeIDs: []string{node1.ID},
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node1.SecretID},
}
dereg1.AuthToken = invalidToken.SecretID
if err := msgpackrpc.CallWithCodec(codec, "Node.BatchDeregister", dereg1, &resp); err == nil {
Expand Down Expand Up @@ -650,7 +650,7 @@ func TestClientEndpoint_Deregister_Vault(t *testing.T) {
// Deregister
dereg := &structs.NodeBatchDeregisterRequest{
NodeIDs: []string{node.ID},
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.GenericResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.BatchDeregister", dereg, &resp2); err != nil {
Expand Down Expand Up @@ -712,7 +712,7 @@ func TestClientEndpoint_Deregister_Vault_WorkloadIdentity(t *testing.T) {
// are revoked.
dereg := &structs.NodeDeregisterRequest{
NodeID: node.ID,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.GenericResponse
err = msgpackrpc.CallWithCodec(codec, "Node.Deregister", dereg, &resp2)
Expand Down Expand Up @@ -761,7 +761,7 @@ func TestClientEndpoint_UpdateStatus(t *testing.T) {
dereg := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusInit,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeUpdateResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", dereg, &resp2); err != nil {
Expand Down Expand Up @@ -849,7 +849,7 @@ func TestClientEndpoint_UpdateStatus_Vault(t *testing.T) {
dereg := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusDown,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeUpdateResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", dereg, &resp2); err != nil {
Expand Down Expand Up @@ -902,7 +902,7 @@ func TestClientEndpoint_UpdateStatus_Vault_WorkloadIdentity(t *testing.T) {
updateReq := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusDown,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeUpdateResponse
err = msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", updateReq, &resp2)
Expand Down Expand Up @@ -974,7 +974,6 @@ func TestClientEndpoint_UpdateStatus_Reconnect(t *testing.T) {
heartbeat := func(ctx context.Context) {
ticker := time.NewTicker(heartbeatTTL / 2)
defer ticker.Stop()

for {
select {
case <-ctx.Done():
Expand All @@ -983,11 +982,12 @@ func TestClientEndpoint_UpdateStatus_Reconnect(t *testing.T) {
if t.Failed() {
return
}

req := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: "global"},
NodeID: node.ID,
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{
Region: "global",
AuthToken: node.SecretID},
}
var resp structs.NodeUpdateResponse
// Ignore errors since an unexpected failed heartbeat will cause
Expand Down Expand Up @@ -1049,7 +1049,8 @@ func TestClientEndpoint_UpdateStatus_Reconnect(t *testing.T) {
allocUpdateReq := &structs.AllocUpdateRequest{
Alloc: []*structs.Allocation{alloc},
WriteRequest: structs.WriteRequest{
Region: "global",
Region: "global",
AuthToken: node.SecretID,
},
}
var resp structs.GenericResponse
Expand Down Expand Up @@ -1163,7 +1164,7 @@ func TestClientEndpoint_UpdateStatus_HeartbeatRecovery(t *testing.T) {
dereg := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusInit,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeUpdateResponse
require.NoError(msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", dereg, &resp2))
Expand Down Expand Up @@ -1253,7 +1254,7 @@ func TestClientEndpoint_Register_GetEvals(t *testing.T) {
req := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusDown,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", req, &resp); err != nil {
t.Fatalf("err: %v", err)
Expand All @@ -1266,7 +1267,7 @@ func TestClientEndpoint_Register_GetEvals(t *testing.T) {
req = &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", req, &resp); err != nil {
t.Fatalf("err: %v", err)
Expand Down Expand Up @@ -1316,7 +1317,7 @@ func TestClientEndpoint_UpdateStatus_GetEvals(t *testing.T) {
update := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusReady,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeUpdateResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", update, &resp2); err != nil {
Expand Down Expand Up @@ -1422,7 +1423,7 @@ func TestClientEndpoint_UpdateStatus_HeartbeatOnly(t *testing.T) {
dereg := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: node.Status,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeUpdateResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", dereg, &resp2); err != nil {
Expand Down Expand Up @@ -1514,7 +1515,7 @@ func TestNode_UpdateStatus_ServiceRegistrations(t *testing.T) {
args := structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusDown,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}

var reply structs.NodeUpdateResponse
Expand Down Expand Up @@ -1988,7 +1989,7 @@ func TestClientEndpoint_Drain_Down(t *testing.T) {
req := &structs.NodeUpdateStatusRequest{
NodeID: node.ID,
Status: structs.NodeStatusDown,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
require.Nil(msgpackrpc.CallWithCodec(codec, "Node.UpdateStatus", req, &resp))

Expand Down Expand Up @@ -3418,7 +3419,7 @@ func TestClientEndpoint_UpdateAlloc_Vault(t *testing.T) {
// Update the alloc
update := &structs.AllocUpdateRequest{
Alloc: []*structs.Allocation{clientAlloc},
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeAllocsResponse
start := time.Now()
Expand Down Expand Up @@ -3501,7 +3502,7 @@ func TestClientEndpoint_UpdateAlloc_VaultWorkloadIdentity(t *testing.T) {

update := &structs.AllocUpdateRequest{
Alloc: []*structs.Allocation{clientAlloc},
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}
var resp2 structs.NodeAllocsResponse
err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", update, &resp2)
Expand Down Expand Up @@ -4598,7 +4599,7 @@ func TestClientEndpoint_EmitEvents(t *testing.T) {
nodeEvents := map[string][]*structs.NodeEvent{node.ID: {nodeEvent}}
req := structs.EmitNodeEventsRequest{
NodeEvents: nodeEvents,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: node.SecretID},
}

var resp structs.GenericResponse
Expand Down
16 changes: 13 additions & 3 deletions website/content/docs/upgrade/upgrade-specific.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,16 @@ upgrade. However, specific versions of Nomad may have more details provided for
their upgrades as a result of new features or changed behavior. This page is
used to document those details separately from the standard upgrade flow.

## Nomad 1.8.3 (UNRELEASED)
## Nomad 1.9.0

#### Dropped support for older clients

Nomad 1.9.0 removes support for Nomad client agents older than 1.6.0. Older
nodes will fail heartbeats. Nomad servers will mark the workloads on those nodes
as lost and reschedule them normally according to the job's [reschedule][]
block.

## Nomad 1.8.3

#### Nomad keyring rotation

Expand Down Expand Up @@ -82,7 +91,7 @@ In Nomad 1.7.0 the `raw_exec` plugin option for `no_cgroups` became ineffective.
Starting in Nomad 1.8.0 attempting to set the `no_cgroups` in `raw_exec` plugin
configuration will result in an error when starting the agent.

## Nomad 1.7.11 (UNRELEASED)
## Nomad 1.7.11

<EnterpriseAlert inline />

Expand Down Expand Up @@ -261,7 +270,7 @@ cgroup to halt the process group of a Task before issuing a kill signal to each
process. Starting in Nomad 1.7.0 this behavior is always enabled (and a similar
mechanism has always been enabled on cgroups v2 systems).

## Nomad 1.6.14 (UNRELEASED)
## Nomad 1.6.14

<EnterpriseAlert inline />

Expand Down Expand Up @@ -2164,3 +2173,4 @@ deleted and then Nomad 0.3.0 can be launched.
[vault_grace]: /nomad/docs/job-specification/template
[Workload Identity]: /nomad/docs/concepts/workload-identity
[Process Isolation]: https://learn.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container#process-isolation
[reschedule]: /nomad/docs/jobs-specification/reschedule

0 comments on commit ef5c933

Please sign in to comment.