Skip to content

Commit

Permalink
auth: enforce use of node secret and remove legacy auth
Browse files Browse the repository at this point in the history
As of Nomad 1.6.0, Nomad client agents send their secret with all the
RPCs (other than registration). But for backwards compatibility we had to keep
a legacy auth method that didn't require the node secret. We've previously
announced that this legacy auth method would be removed and that nodes older
than 1.6.0 would not be supported with Nomad 1.9.0.

This changeset removes the legacy auth method.

Ref: https://developer.hashicorp.com/nomad/docs/release-notes/nomad/upcoming#nomad-1-9-0
  • Loading branch information
tgross committed Sep 3, 2024
1 parent c43e30a commit b178243
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 87 deletions.
3 changes: 3 additions & 0 deletions .changelog/23838.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:breaking-change
heartbeats: clients older than 1.6.0 will fail heartbeats to 1.9.0+ servers
```
4 changes: 0 additions & 4 deletions nomad/acl.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ func (s *Server) AuthenticateClientOnly(ctx *RPCContext, args structs.RequestWit
return s.auth.AuthenticateClientOnly(ctx, args)
}

func (s *Server) AuthenticateClientOnlyLegacy(ctx *RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {
return s.auth.AuthenticateClientOnlyLegacy(ctx, args)
}

func (s *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
return s.auth.ResolveACL(args)
}
Expand Down
32 changes: 0 additions & 32 deletions nomad/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,38 +328,6 @@ func (s *Authenticator) AuthenticateClientOnly(ctx RPCContext, args structs.Requ
return acl.ClientACL, nil
}

// AuthenticateClientOnlyLegacy is a version of AuthenticateClientOnly that's
// used by a few older RPCs that did not properly enforce node secrets.
// COMPAT(1.8.0): In Nomad 1.6.0 we starting sending those node secrets, so we
// can remove this in Nomad 1.8.0.
func (s *Authenticator) AuthenticateClientOnlyLegacy(ctx RPCContext, args structs.RequestWithIdentity) (*acl.ACL, error) {

remoteIP, err := ctx.GetRemoteIP() // capture for metrics
if err != nil {
s.logger.Error("could not determine remote address", "error", err)
}

identity := &structs.AuthenticatedIdentity{RemoteIP: remoteIP}
defer args.SetIdentity(identity) // always set the identity, even on errors

if s.verifyTLS && !ctx.IsStatic() {
tlsCert := ctx.Certificate()
if tlsCert == nil {
return nil, errors.New("missing certificate information")
}

// set on the identity whether or not its valid for server RPC, so we
// can capture it for metrics
identity.TLSName = tlsCert.Subject.CommonName
_, err := validateCertificateForNames(tlsCert, s.validClientCertNames)
if err != nil {
return nil, err
}
}

return acl.ClientACL, nil
}

// validateCertificateForNames returns true if the certificate is valid for any
// of the given domain names.
func validateCertificateForNames(cert *x509.Certificate, expectedNames []string) (bool, error) {
Expand Down
28 changes: 14 additions & 14 deletions nomad/drainer_int_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import (
// healthy deployments, and service allocations that are DesiredStatus=stop on
// the server get updates with terminal client status.
func allocClientStateSimulator(t *testing.T, errCh chan<- error, ctx context.Context,
srv *Server, nodeID string, logger log.Logger) {
srv *Server, nodeID, nodeSecret string, logger log.Logger) {

codec := rpcClient(t, srv)
store := srv.State()
Expand Down Expand Up @@ -88,7 +88,7 @@ func allocClientStateSimulator(t *testing.T, errCh chan<- error, ctx context.Con
// Send the update
req := &structs.AllocUpdateRequest{
Alloc: updates,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: nodeSecret},
}
var resp structs.GenericResponse
if err := msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", req, &resp); err != nil {
Expand Down Expand Up @@ -202,8 +202,8 @@ func TestDrainer_Simple_ServiceOnly(t *testing.T) {
errCh := make(chan error, 2)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, n1.SecretID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, n2.SecretID, srv.logger)

// Wait for the allocs to be replaced
waitForAllocsStop(t, store, n1.ID, nil)
Expand Down Expand Up @@ -390,8 +390,8 @@ func TestDrainer_AllTypes_Deadline(t *testing.T) {
errCh := make(chan error, 2)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, n2.SecretID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, n2.SecretID, srv.logger)

// Wait for allocs to be replaced
finalAllocs := waitForAllocsStop(t, store, n1.ID, nil)
Expand Down Expand Up @@ -498,8 +498,8 @@ func TestDrainer_AllTypes_NoDeadline(t *testing.T) {
errCh := make(chan error, 2)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, n1.SecretID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, n2.SecretID, srv.logger)

// Wait for the service allocs (only) to be stopped on the draining node
must.Wait(t, wait.InitialSuccess(wait.ErrorFunc(func() error {
Expand Down Expand Up @@ -532,7 +532,7 @@ func TestDrainer_AllTypes_NoDeadline(t *testing.T) {

batchDoneReq := &structs.AllocUpdateRequest{
Alloc: updates,
WriteRequest: structs.WriteRequest{Region: "global"},
WriteRequest: structs.WriteRequest{Region: "global", AuthToken: n1.SecretID},
}
err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", batchDoneReq, &resp)
must.NoError(t, err)
Expand Down Expand Up @@ -648,8 +648,8 @@ func TestDrainer_AllTypes_Deadline_GarbageCollectedNode(t *testing.T) {
errCh := make(chan error, 2)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, n1.SecretID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, n2.SecretID, srv.logger)

// Wait for the allocs to be replaced
waitForAllocsStop(t, store, n1.ID, errCh)
Expand Down Expand Up @@ -737,8 +737,8 @@ func TestDrainer_MultipleNSes_ServiceOnly(t *testing.T) {
errCh := make(chan error, 2)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, n1.SecretID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n2.ID, n2.SecretID, srv.logger)

// Wait for the allocs to be replaced
waitForAllocsStop(t, store, n1.ID, errCh)
Expand Down Expand Up @@ -815,7 +815,7 @@ func TestDrainer_Batch_TransitionToForce(t *testing.T) {
errCh := make(chan error, 1)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, srv.logger)
go allocClientStateSimulator(t, errCh, ctx, srv, n1.ID, n1.SecretID, srv.logger)

// Make sure the batch job isn't affected
must.Wait(t, wait.ContinualSuccess(wait.ErrorFunc(func() error {
Expand Down
1 change: 1 addition & 0 deletions nomad/job_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ func TestJobEndpoint_Register_NonOverlapping(t *testing.T) {
WriteRequest: structs.WriteRequest{
Region: "global",
Namespace: job.Namespace,
AuthToken: node.SecretID,
},
}

Expand Down
14 changes: 10 additions & 4 deletions nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,8 @@ func (n *Node) deregister(args *structs.NodeBatchDeregisterRequest,
// │ │
// └──── ready ─────┘
func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *structs.NodeUpdateResponse) error {
// UpdateStatus receives requests from client and servers that mark failed
// heartbeats, so we can't use AuthenticateClientOnly
authErr := n.srv.Authenticate(n.ctx, args)

isForwarded := args.IsForwarded()
Expand All @@ -560,6 +562,12 @@ func (n *Node) UpdateStatus(args *structs.NodeUpdateStatusRequest, reply *struct

defer metrics.MeasureSince([]string{"nomad", "client", "update_status"}, time.Now())

if aclObj, err := n.srv.ResolveACL(args); err != nil {
return structs.ErrPermissionDenied
} else if !(aclObj.AllowClientOp() || aclObj.AllowServerOp()) {
return structs.ErrPermissionDenied
}

// Verify the arguments
if args.NodeID == "" {
return fmt.Errorf("missing node ID for client status update")
Expand Down Expand Up @@ -1296,8 +1304,7 @@ func (n *Node) GetClientAllocs(args *structs.NodeSpecificRequest,
// - The node status is down or disconnected. Clients must call the
// UpdateStatus method to update its status in the server.
func (n *Node) UpdateAlloc(args *structs.AllocUpdateRequest, reply *structs.GenericResponse) error {
// COMPAT(1.9.0): move to AuthenticateClientOnly
aclObj, err := n.srv.AuthenticateClientOnlyLegacy(n.ctx, args)
aclObj, err := n.srv.AuthenticateClientOnly(n.ctx, args)
n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args)
if err != nil {
return structs.ErrPermissionDenied
Expand Down Expand Up @@ -2239,8 +2246,7 @@ func taskUsesConnect(task *structs.Task) bool {
}

func (n *Node) EmitEvents(args *structs.EmitNodeEventsRequest, reply *structs.EmitNodeEventsResponse) error {
// COMPAT(1.9.0): move to AuthenticateClientOnly
aclObj, err := n.srv.AuthenticateClientOnlyLegacy(n.ctx, args)
aclObj, err := n.srv.AuthenticateClientOnly(n.ctx, args)
n.srv.MeasureRPCRate("node", structs.RateMetricWrite, args)
if err != nil {
return structs.ErrPermissionDenied
Expand Down
Loading

0 comments on commit b178243

Please sign in to comment.