Skip to content

Commit

Permalink
fix bug that can lead to peering service deletes impacting the state …
Browse files Browse the repository at this point in the history
…of local services
  • Loading branch information
erichaberkorn committed Mar 8, 2023
1 parent dbaf8bf commit e5172b9
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 31 deletions.
3 changes: 3 additions & 0 deletions .changelog/16570.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
peering: Fixes a bug that can lead to peering service deletes impacting the state of local services
```
31 changes: 18 additions & 13 deletions agent/consul/state/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -1186,7 +1186,7 @@ func serviceListTxn(tx ReadTxn, ws memdb.WatchSet, entMeta *acl.EnterpriseMeta,
unique := make(map[structs.ServiceName]struct{})
for service := services.Next(); service != nil; service = services.Next() {
svc := service.(*structs.ServiceNode)
unique[svc.CompoundServiceName()] = struct{}{}
unique[svc.CompoundServiceName().ServiceName] = struct{}{}
}

results := make(structs.ServiceList, 0, len(unique))
Expand Down Expand Up @@ -1920,17 +1920,17 @@ func (s *Store) deleteServiceTxn(tx WriteTxn, idx uint64, nodeName, serviceID st
return fmt.Errorf("failed updating service-kind indexes: %w", err)
}
// Update the node indexes as the service information is included in node catalog queries.
if err := catalogUpdateNodesIndexes(tx, idx, entMeta, peerName); err != nil {
if err := catalogUpdateNodesIndexes(tx, idx, entMeta, svc.PeerName); err != nil {
return fmt.Errorf("failed updating nodes indexes: %w", err)
}
if err := catalogUpdateNodeIndexes(tx, idx, nodeName, entMeta, peerName); err != nil {
if err := catalogUpdateNodeIndexes(tx, idx, nodeName, entMeta, svc.PeerName); err != nil {
return fmt.Errorf("failed updating node indexes: %w", err)
}

name := svc.CompoundServiceName()
psn := svc.CompoundServiceName()

if err := cleanupMeshTopology(tx, idx, svc); err != nil {
return fmt.Errorf("failed to clean up mesh-topology associations for %q: %v", name.String(), err)
return fmt.Errorf("failed to clean up mesh-topology associations for %q: %v", psn.String(), err)
}

q := Query{
Expand All @@ -1957,12 +1957,14 @@ func (s *Store) deleteServiceTxn(tx WriteTxn, idx uint64, nodeName, serviceID st
if err := catalogUpdateServiceExtinctionIndex(tx, idx, entMeta, svc.PeerName); err != nil {
return err
}
psn := structs.PeeredServiceName{Peer: svc.PeerName, ServiceName: name}
if err := freeServiceVirtualIP(tx, idx, psn, nil); err != nil {
return fmt.Errorf("failed to clean up virtual IP for %q: %v", name.String(), err)
return fmt.Errorf("failed to clean up virtual IP for %q: %v", psn.String(), err)
}
if err := cleanupKindServiceName(tx, idx, svc.CompoundServiceName(), svc.ServiceKind); err != nil {
return fmt.Errorf("failed to persist service name: %v", err)

if svc.PeerName == "" {
if err := cleanupKindServiceName(tx, idx, psn.ServiceName, svc.ServiceKind); err != nil {
return fmt.Errorf("failed to persist service name: %v", err)
}
}
}
} else {
Expand Down Expand Up @@ -1990,7 +1992,7 @@ func (s *Store) deleteServiceTxn(tx WriteTxn, idx uint64, nodeName, serviceID st
if svc.PeerName == "" {
sn := structs.ServiceName{Name: svc.ServiceName, EnterpriseMeta: svc.EnterpriseMeta}
if err := cleanupGatewayWildcards(tx, idx, sn, false); err != nil {
return fmt.Errorf("failed to clean up gateway-service associations for %q: %v", name.String(), err)
return fmt.Errorf("failed to clean up gateway-service associations for %q: %v", psn.String(), err)
}
}

Expand Down Expand Up @@ -2695,7 +2697,7 @@ func (s *Store) CheckIngressServiceNodes(ws memdb.WatchSet, serviceName string,
// De-dup services to lookup
names := make(map[structs.ServiceName]struct{})
for _, n := range nodes {
names[n.CompoundServiceName()] = struct{}{}
names[n.CompoundServiceName().ServiceName] = struct{}{}
}

var results structs.CheckServiceNodes
Expand Down Expand Up @@ -3657,7 +3659,7 @@ func updateGatewayNamespace(tx WriteTxn, idx uint64, service *structs.GatewaySer
continue
}

existing, err := tx.First(tableGatewayServices, indexID, service.Gateway, sn.CompoundServiceName(), service.Port)
existing, err := tx.First(tableGatewayServices, indexID, service.Gateway, sn.CompoundServiceName().ServiceName, service.Port)
if err != nil {
return fmt.Errorf("gateway service lookup failed: %s", err)
}
Expand Down Expand Up @@ -4611,7 +4613,10 @@ func updateMeshTopology(tx WriteTxn, idx uint64, node string, svc *structs.NodeS
// cleanupMeshTopology removes a service from the mesh topology table
// This is only safe to call when there are no more known instances of this proxy
func cleanupMeshTopology(tx WriteTxn, idx uint64, service *structs.ServiceNode) error {
// TODO(peering): make this peering aware?
if service.PeerName != "" {
return nil
}

if service.ServiceKind != structs.ServiceKindConnectProxy {
return nil
}
Expand Down
51 changes: 40 additions & 11 deletions agent/consul/state/catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2577,20 +2577,49 @@ func TestStateStore_DeleteService(t *testing.T) {
testRegisterService(t, s, 2, "node1", "service1")
testRegisterCheck(t, s, 3, "node1", "service1", "check1", api.HealthPassing)

// Delete the service.
// register a node with a service on a cluster peer.
testRegisterNodeOpts(t, s, 4, "node1", func(n *structs.Node) error {
n.PeerName = "cluster-01"
return nil
})
testRegisterServiceOpts(t, s, 5, "node1", "service1", func(service *structs.NodeService) {
service.PeerName = "cluster-01"
})

wsPeer := memdb.NewWatchSet()
_, ns, err := s.NodeServices(wsPeer, "node1", nil, "cluster-01")
require.Len(t, ns.Services, 1)
require.NoError(t, err)

ws := memdb.NewWatchSet()
_, _, err := s.NodeServices(ws, "node1", nil, "")
_, ns, err = s.NodeServices(ws, "node1", nil, "")
require.Len(t, ns.Services, 1)
require.NoError(t, err)
if err := s.DeleteService(4, "node1", "service1", nil, ""); err != nil {
t.Fatalf("err: %s", err)

{
// Delete the peered service.
err = s.DeleteService(6, "node1", "service1", nil, "cluster-01")
require.NoError(t, err)
require.True(t, watchFired(wsPeer))
_, kindServiceNames, err := s.ServiceNamesOfKind(nil, structs.ServiceKindTypical)
require.NoError(t, err)
require.Len(t, kindServiceNames, 1)
require.Equal(t, "service1", kindServiceNames[0].Service.Name)
}
if !watchFired(ws) {
t.Fatalf("bad")

{
// Delete the service.
err = s.DeleteService(6, "node1", "service1", nil, "")
require.NoError(t, err)
require.True(t, watchFired(ws))
_, kindServiceNames, err := s.ServiceNamesOfKind(nil, structs.ServiceKindTypical)
require.NoError(t, err)
require.Len(t, kindServiceNames, 0)
}

// Service doesn't exist.
ws = memdb.NewWatchSet()
_, ns, err := s.NodeServices(ws, "node1", nil, "")
_, ns, err = s.NodeServices(ws, "node1", nil, "")
if err != nil || ns == nil || len(ns.Services) != 0 {
t.Fatalf("bad: %#v (err: %#v)", ns, err)
}
Expand All @@ -2605,15 +2634,15 @@ func TestStateStore_DeleteService(t *testing.T) {
}

// Index tables were updated.
assert.Equal(t, uint64(4), catalogChecksMaxIndex(tx, nil, ""))
assert.Equal(t, uint64(4), catalogServicesMaxIndex(tx, nil, ""))
assert.Equal(t, uint64(6), catalogChecksMaxIndex(tx, nil, ""))
assert.Equal(t, uint64(6), catalogServicesMaxIndex(tx, nil, ""))

// Deleting a nonexistent service should be idempotent and not return an
// error, nor fire a watch.
if err := s.DeleteService(5, "node1", "service1", nil, ""); err != nil {
if err := s.DeleteService(6, "node1", "service1", nil, ""); err != nil {
t.Fatalf("err: %s", err)
}
assert.Equal(t, uint64(4), catalogServicesMaxIndex(tx, nil, ""))
assert.Equal(t, uint64(6), catalogServicesMaxIndex(tx, nil, ""))
if watchFired(ws) {
t.Fatalf("bad")
}
Expand Down
6 changes: 3 additions & 3 deletions agent/consul/state/usage.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ func updateUsage(tx WriteTxn, changes Changes) error {
// changed, in order to compare it with the finished memdb state.
// Make sure to account for the fact that services can change their names.
if serviceNameChanged(change) {
serviceNameChanges[svc.CompoundServiceName()] += 1
serviceNameChanges[svc.CompoundServiceName().ServiceName] += 1
before := change.Before.(*structs.ServiceNode)
serviceNameChanges[before.CompoundServiceName()] -= 1
serviceNameChanges[before.CompoundServiceName().ServiceName] -= 1
} else {
serviceNameChanges[svc.CompoundServiceName()] += delta
serviceNameChanges[svc.CompoundServiceName().ServiceName] += delta
}

case "kvs":
Expand Down
12 changes: 8 additions & 4 deletions agent/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ func (sn *ServiceNode) CompoundServiceID() ServiceID {
}
}

func (sn *ServiceNode) CompoundServiceName() ServiceName {
func (sn *ServiceNode) CompoundServiceName() PeeredServiceName {
name := sn.ServiceName
if name == "" {
name = sn.ServiceID
Expand All @@ -1163,10 +1163,14 @@ func (sn *ServiceNode) CompoundServiceName() ServiceName {
entMeta := sn.EnterpriseMeta
entMeta.Normalize()

return ServiceName{
Name: name,
EnterpriseMeta: entMeta,
return PeeredServiceName{
ServiceName: ServiceName{
Name: name,
EnterpriseMeta: entMeta,
},
Peer: sn.PeerName,
}

}

// Weights represent the weight used by DNS for a given status
Expand Down

0 comments on commit e5172b9

Please sign in to comment.