From 31e0519f30565f2d70fd3eef6436a3dd0f67862c Mon Sep 17 00:00:00 2001 From: David Cheung Date: Wed, 23 Nov 2022 08:05:28 +0000 Subject: [PATCH] Add check for invalid or missing data in EPS Check if EndpointSlice has missing or invalid data zone/nodeName. If so, the syncer will enter the error state. --- pkg/neg/syncers/transaction.go | 22 ++- pkg/neg/syncers/transaction_test.go | 218 +++++++++++++++++++++++++++- 2 files changed, 237 insertions(+), 3 deletions(-) diff --git a/pkg/neg/syncers/transaction.go b/pkg/neg/syncers/transaction.go index d5058ff6e7..9b81b9576e 100644 --- a/pkg/neg/syncers/transaction.go +++ b/pkg/neg/syncers/transaction.go @@ -242,7 +242,7 @@ func (s *transactionSyncer) syncInternalImpl() error { } endpointsData := negtypes.EndpointsDataFromEndpointSlices(endpointSlices) targetMap, endpointPodMap, dupCount, err = s.endpointsCalculator.CalculateEndpoints(endpointsData, currentMap) - if s.invalidEndpointInfo(endpointsData, endpointPodMap, dupCount) { + if s.invalidEndpointInfo(endpointsData, endpointPodMap, dupCount) || s.isZoneMissing(targetMap) { s.setErrorState() } if err != nil { @@ -362,6 +362,7 @@ func (s *transactionSyncer) ensureNetworkEndpointGroups() error { // 1. The endpoint count from endpointData doesn't equal to the one from endpointPodMap: // endpiontPodMap removes the duplicated endpoints, and dupCount stores the number of duplicated it removed // and we compare the endpoint counts with duplicates +// 2. There is at least one endpoint in endpointData with missing nodeName func (s *transactionSyncer) invalidEndpointInfo(eds []negtypes.EndpointsData, endpointPodMap negtypes.EndpointPodMap, dupCount int) bool { // Endpoint count from EndpointPodMap countFromPodMap := len(endpointPodMap) + dupCount @@ -370,9 +371,26 @@ func (s *transactionSyncer) invalidEndpointInfo(eds []negtypes.EndpointsData, en countFromEndpointData := 0 for _, ed := range eds { countFromEndpointData += len(ed.Addresses) + for _, endpointAddress := range ed.Addresses { + nodeName := endpointAddress.NodeName + if nodeName == nil || len(*nodeName) == 0 { + s.logger.Info("Detected error when checking nodeName", "endpoint", endpointAddress, "endpointData", eds) + return true + } + } } + if countFromEndpointData != countFromPodMap { - s.logger.Info("Detected error when comparing endpoint counts, marking syncer in error state", "endpointData", eds, "endpointPodMap", endpointPodMap, "dupCount", dupCount) + s.logger.Info("Detected error when comparing endpoint counts", "endpointData", eds, "endpointPodMap", endpointPodMap, "dupCount", dupCount) + return true + } + return false +} + +// isZoneMissing returns true if there is invalid(empty) zone in zoneNetworkEndpointMap +func (s *transactionSyncer) isZoneMissing(zoneNetworkEndpointMap map[string]negtypes.NetworkEndpointSet) bool { + if _, isPresent := zoneNetworkEndpointMap[""]; isPresent { + s.logger.Info("Detected error when checking missing zone", "zoneNetworkEndpointMap", zoneNetworkEndpointMap) return true } return false diff --git a/pkg/neg/syncers/transaction_test.go b/pkg/neg/syncers/transaction_test.go index 916ee81737..c30cd2d4da 100644 --- a/pkg/neg/syncers/transaction_test.go +++ b/pkg/neg/syncers/transaction_test.go @@ -1409,7 +1409,7 @@ func TestUnknownNodes(t *testing.T) { } } -func TestInvalidEndpointInfoEndpointCountsDiffer(t *testing.T) { +func TestInvalidEndpointInfo(t *testing.T) { t.Parallel() _, transactionSyncer := newTestTransactionSyncer(negtypes.NewAdapter(gce.NewFakeGCECloud(gce.DefaultTestClusterValues())), negtypes.VmIpPortEndpointType, false, true) @@ -1754,6 +1754,148 @@ func TestInvalidEndpointInfoEndpointCountsDiffer(t *testing.T) { dupCount: 1, expect: true, }, + { + desc: "no missing nodeNames", + endpointsData: []negtypes.EndpointsData{ + { + Meta: &metav1.ObjectMeta{ + Name: testServiceName + "-1", + Namespace: testServiceNamespace, + }, + Ports: []negtypes.PortData{ + { + Name: testPortName, + Port: port80, + }, + }, + Addresses: []negtypes.AddressData{ + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName1, + }, + NodeName: &instance1, + Addresses: []string{testIP1}, + Ready: true, + }, + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName2, + }, + NodeName: &instance1, + Addresses: []string{testIP2}, + Ready: true, + }, + }, + }, + { + Meta: &metav1.ObjectMeta{ + Name: testServiceName + "-2", + Namespace: testServiceNamespace, + }, + Ports: []negtypes.PortData{ + { + Name: testPortName, + Port: port81, + }, + }, + Addresses: []negtypes.AddressData{ + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName3, + }, + NodeName: &instance2, + Addresses: []string{testIP3}, + Ready: true, + }, + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName4, + }, + NodeName: &instance4, + Addresses: []string{testIP4}, + Ready: true, + }, + }, + }, + }, + dupCount: 0, + expect: false, + }, + { + desc: "at least one endpoint is missing a nodeName", + endpointsData: []negtypes.EndpointsData{ + { + Meta: &metav1.ObjectMeta{ + Name: testServiceName + "-1", + Namespace: testServiceNamespace, + }, + Ports: []negtypes.PortData{ + { + Name: testPortName, + Port: port80, + }, + }, + Addresses: []negtypes.AddressData{ + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName1, + }, + NodeName: nil, + Addresses: []string{testIP1}, + Ready: true, + }, + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName2, + }, + NodeName: &instance1, + Addresses: []string{testIP2}, + Ready: true, + }, + }, + }, + { + Meta: &metav1.ObjectMeta{ + Name: testServiceName + "-2", + Namespace: testServiceNamespace, + }, + Ports: []negtypes.PortData{ + { + Name: testPortName, + Port: port81, + }, + }, + Addresses: []negtypes.AddressData{ + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName3, + }, + NodeName: &instance2, + Addresses: []string{testIP3}, + Ready: true, + }, + { + TargetRef: &corev1.ObjectReference{ + Namespace: testServiceNamespace, + Name: testPodName4, + }, + NodeName: &instance4, + Addresses: []string{testIP4}, + Ready: true, + }, + }, + }, + }, + dupCount: 0, + expect: true, + }, } for _, tc := range testCases { @@ -1765,6 +1907,80 @@ func TestInvalidEndpointInfoEndpointCountsDiffer(t *testing.T) { } } +func TestIsZoneMissing(t *testing.T) { + t.Parallel() + _, transactionSyncer := newTestTransactionSyncer(negtypes.NewAdapter(gce.NewFakeGCECloud(gce.DefaultTestClusterValues())), negtypes.VmIpPortEndpointType, false, true) + instance1 := testInstance1 + instance2 := testInstance2 + + zone1 := "us-central1-a" + zone2 := "us-central1-b" + + testCases := []struct { + desc string + zoneNetworkEndpointMap map[string]negtypes.NetworkEndpointSet + expect bool + }{ + { + desc: "no missing zones", + zoneNetworkEndpointMap: map[string]negtypes.NetworkEndpointSet{ + zone1: negtypes.NewNetworkEndpointSet( + negtypes.NetworkEndpoint{ + IP: "10.100.1.2", + Port: "80", + Node: instance1, + }, + negtypes.NetworkEndpoint{ + IP: "10.100.1.3", + Port: "80", + Node: instance1, + }, + ), + zone2: negtypes.NewNetworkEndpointSet( + negtypes.NetworkEndpoint{ + IP: "10.100.2.2", + Port: "81", + Node: instance2, + }, + ), + }, + expect: false, + }, + { + desc: "contains one missing zone", + zoneNetworkEndpointMap: map[string]negtypes.NetworkEndpointSet{ + zone1: negtypes.NewNetworkEndpointSet( + negtypes.NetworkEndpoint{ + IP: "10.100.1.2", + Port: "80", + Node: instance1, + }, + negtypes.NetworkEndpoint{ + IP: "10.100.1.3", + Port: "80", + Node: instance1, + }, + ), + "": negtypes.NewNetworkEndpointSet( + negtypes.NetworkEndpoint{ + IP: "10.100.2.2", + Port: "81", + Node: instance2, + }, + ), + }, + expect: true, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + if got := transactionSyncer.isZoneMissing(tc.zoneNetworkEndpointMap); got != tc.expect { + t.Errorf("isZoneMissing() = %t, expected %t", got, tc.expect) + } + }) + } +} + func newL4ILBTestTransactionSyncer(fakeGCE negtypes.NetworkEndpointGroupCloud, mode negtypes.EndpointsCalculatorMode, enableEndpointSlices bool) (negtypes.NegSyncer, *transactionSyncer) { negsyncer, ts := newTestTransactionSyncer(fakeGCE, negtypes.VmIpEndpointType, false, enableEndpointSlices) ts.endpointsCalculator = GetEndpointsCalculator(ts.nodeLister, ts.podLister, ts.zoneGetter, ts.NegSyncerKey, mode, klog.TODO())