From 451c47fe538b15601c01e96b9592b31fa4a1afad Mon Sep 17 00:00:00 2001 From: David Cheung Date: Tue, 28 Feb 2023 20:40:56 +0000 Subject: [PATCH] Revert "Add metrics for sync result" Revert #1911 --- pkg/neg/controller.go | 1 - pkg/neg/metrics/neg_metrics_collector.go | 22 ------- pkg/neg/syncers/transaction.go | 81 ++++++++++-------------- pkg/neg/syncers/transaction_test.go | 60 +++++++----------- pkg/neg/syncers/utils.go | 13 +++- 5 files changed, 67 insertions(+), 110 deletions(-) diff --git a/pkg/neg/controller.go b/pkg/neg/controller.go index fb5e52b8b7..a67fbfef31 100644 --- a/pkg/neg/controller.go +++ b/pkg/neg/controller.go @@ -318,7 +318,6 @@ func (c *Controller) Run(stopCh <-chan struct{}) { wait.Until(c.gc, c.gcPeriod, stopCh) }() go c.reflector.Run(stopCh) - go c.syncerMetrics.Run(stopCh) <-stopCh } diff --git a/pkg/neg/metrics/neg_metrics_collector.go b/pkg/neg/metrics/neg_metrics_collector.go index 15badc03fc..b8b8f81dd1 100644 --- a/pkg/neg/metrics/neg_metrics_collector.go +++ b/pkg/neg/metrics/neg_metrics_collector.go @@ -20,27 +20,11 @@ import ( "sync" "time" - "github.com/prometheus/client_golang/prometheus" "k8s.io/apimachinery/pkg/util/wait" negtypes "k8s.io/ingress-gce/pkg/neg/types" "k8s.io/klog/v2" ) -var ( - syncResultLabel = "result" - syncResultKey = "sync_result" - - // syncerSyncResult tracks the count for each sync result - syncerSyncResult = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Subsystem: negControllerSubsystem, - Name: syncResultKey, - Help: "Current count for each sync result", - }, - []string{syncResultLabel}, - ) -) - type SyncerMetricsCollector interface { UpdateSyncer(key negtypes.NegSyncerKey, result *negtypes.NegSyncResult) SetSyncerEPMetrics(key negtypes.NegSyncerKey, epState *negtypes.SyncerEPStat) @@ -79,7 +63,6 @@ func FakeSyncerMetrics() *SyncerMetrics { // RegisterSyncerMetrics registers syncer related metrics func RegisterSyncerMetrics() { - prometheus.MustRegister(syncerSyncResult) } func (sm *SyncerMetrics) Run(stopCh <-chan struct{}) { @@ -98,11 +81,6 @@ func (sm *SyncerMetrics) export() { // UpdateSyncer update the status of corresponding syncer based on the syncResult. func (sm *SyncerMetrics) UpdateSyncer(key negtypes.NegSyncerKey, syncResult *negtypes.NegSyncResult) { - if syncResult.Result == negtypes.ResultInProgress { - return - } - syncerSyncResult.WithLabelValues(syncResult.Result).Inc() - sm.mu.Lock() defer sm.mu.Unlock() if sm.syncerStatusMap == nil { diff --git a/pkg/neg/syncers/transaction.go b/pkg/neg/syncers/transaction.go index 6b96e53e86..0140732ee2 100644 --- a/pkg/neg/syncers/transaction.go +++ b/pkg/neg/syncers/transaction.go @@ -186,17 +186,14 @@ func (s *transactionSyncer) syncInternal() error { defer s.syncLock.Unlock() start := time.Now() - result := s.syncInternalImpl() + err := s.syncInternalImpl() - s.updateStatus(result.Error) - metrics.PublishNegSyncMetrics(string(s.NegSyncerKey.NegType), string(s.endpointsCalculator.Mode()), result.Error, start) - if result.Error != nil { - s.syncCollector.UpdateSyncer(s.NegSyncerKey, result) - } - return result.Error + s.updateStatus(err) + metrics.PublishNegSyncMetrics(string(s.NegSyncerKey.NegType), string(s.endpointsCalculator.Mode()), err, start) + return err } -func (s *transactionSyncer) syncInternalImpl() *negtypes.NegSyncResult { +func (s *transactionSyncer) syncInternalImpl() error { // TODO(cheungdavid): for now we reset the boolean so it is a no-op, but // in the future, it will be used to trigger degraded mode if the syncer is in error state. if s.inErrorState() { @@ -205,20 +202,20 @@ func (s *transactionSyncer) syncInternalImpl() *negtypes.NegSyncResult { if s.needInit || s.isZoneChange() { if err := s.ensureNetworkEndpointGroups(); err != nil { - return negtypes.NewNegSyncResult(err, negtypes.ResultNegNotFound) + return err } s.needInit = false } if s.syncer.IsStopped() || s.syncer.IsShuttingDown() { s.logger.V(3).Info("Skip syncing NEG", "negSyncerKey", s.NegSyncerKey.String()) - return negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess) + return nil } s.logger.V(2).Info("Sync NEG", "negSyncerKey", s.NegSyncerKey.String(), "endpointsCalculatorMode", s.endpointsCalculator.Mode()) currentMap, err := retrieveExistingZoneNetworkEndpointMap(s.NegSyncerKey.NegName, s.zoneGetter, s.cloud, s.NegSyncerKey.GetAPIVersion(), s.endpointsCalculator.Mode()) if err != nil { - return negtypes.NewNegSyncResult(err, negtypes.ResultCurrentEPNotFound) + return err } s.logStats(currentMap, "current NEG endpoints") @@ -233,11 +230,11 @@ func (s *transactionSyncer) syncInternalImpl() *negtypes.NegSyncResult { slices, err := s.endpointSliceLister.ByIndex(endpointslices.EndpointSlicesByServiceIndex, endpointslices.FormatEndpointSlicesServiceKey(s.Namespace, s.Name)) if err != nil { - return negtypes.NewNegSyncResult(err, negtypes.ResultEPSNotFound) + return err } if len(slices) < 1 { s.logger.Error(nil, "Endpoint slices for the service doesn't exist. Skipping NEG sync") - return negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess) + return nil } endpointSlices := make([]*discovery.EndpointSlice, len(slices)) negCR, err := getNegFromStore(s.svcNegLister, s.Namespace, s.NegSyncerKey.NegName) @@ -262,16 +259,14 @@ func (s *transactionSyncer) syncInternalImpl() *negtypes.NegSyncResult { } endpointsData := negtypes.EndpointsDataFromEndpointSlices(endpointSlices) targetMap, endpointPodMap, dupCount, err = s.endpointsCalculator.CalculateEndpoints(endpointsData, currentMap) - if valid, result := s.CheckEPField(err); !valid { - s.setErrorState(result.Result) - return result + if valid, reason := s.isValidEPField(err); !valid { + s.setErrorState(reason) } - if valid, result := s.CheckEndpointInfo(endpointsData, endpointPodMap, dupCount); !valid { - s.setErrorState(result.Result) - return result + if valid, reason := s.isValidEndpointInfo(endpointsData, endpointPodMap, dupCount); !valid { + s.setErrorState(reason) } if err != nil { - return negtypes.NewNegSyncResult(fmt.Errorf("endpoints calculation error in mode %q, err: %w", s.endpointsCalculator.Mode(), err), negtypes.ResultOtherError) + return fmt.Errorf("endpoints calculation error in mode %q, err: %w", s.endpointsCalculator.Mode(), err) } s.logStats(targetMap, "desired NEG endpoints") @@ -295,7 +290,7 @@ func (s *transactionSyncer) syncInternalImpl() *negtypes.NegSyncResult { if len(addEndpoints) == 0 && len(removeEndpoints) == 0 { s.logger.V(3).Info("No endpoint change. Skip syncing NEG. ", s.Namespace, s.Name) - return negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess) + return nil } s.logEndpoints(addEndpoints, "adding endpoint") s.logEndpoints(removeEndpoints, "removing endpoint") @@ -359,19 +354,19 @@ func (s *transactionSyncer) ensureNetworkEndpointGroups() error { return utilerrors.NewAggregate(errList) } -// CheckEndpointInfo checks if endpoint information is correct. -// It returns false with the corresponding reason if one of the two checks fails: +// isValidEndpointInfo checks if endpoint information is correct. +// It returns false and the corresponding reason if one of the two checks fails: // // 1. The endpoint count from endpointData doesn't equal to the one from endpointPodMap: // endpiontPodMap removes the duplicated endpoints, and dupCount stores the number of duplicated it removed // and we compare the endpoint counts with duplicates // 2. The endpoint count from endpointData or the one from endpointPodMap is 0 -func (s *transactionSyncer) CheckEndpointInfo(eds []negtypes.EndpointsData, endpointPodMap negtypes.EndpointPodMap, dupCount int) (bool, *negtypes.NegSyncResult) { +func (s *transactionSyncer) isValidEndpointInfo(eds []negtypes.EndpointsData, endpointPodMap negtypes.EndpointPodMap, dupCount int) (bool, string) { // Endpoint count from EndpointPodMap countFromPodMap := len(endpointPodMap) + dupCount if countFromPodMap == 0 { s.logger.Info("Detected endpoint count from endpointPodMap going to zero", "endpointPodMap", endpointPodMap) - return false, negtypes.NewNegSyncResult(negtypes.ErrEPCalculationCountZero, negtypes.ResultEPCalculationCountZero) + return false, negtypes.ResultEPCalculationCountZero } // Endpoint count from EndpointData @@ -381,35 +376,27 @@ func (s *transactionSyncer) CheckEndpointInfo(eds []negtypes.EndpointsData, endp } if countFromEndpointData == 0 { s.logger.Info("Detected endpoint count from endpointData going to zero", "endpointData", eds) - return false, negtypes.NewNegSyncResult(negtypes.ErrEPSEndpointCountZero, negtypes.ResultEPSEndpointCountZero) + return false, negtypes.ResultEPSEndpointCountZero } if countFromEndpointData != countFromPodMap { s.logger.Info("Detected error when comparing endpoint counts", "endpointData", eds, "endpointPodMap", endpointPodMap, "dupCount", dupCount) - return false, negtypes.NewNegSyncResult(negtypes.ErrEPCountsDiffer, negtypes.ResultEPCountsDiffer) + return false, negtypes.ResultEPCountsDiffer } - return true, negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess) + return true, negtypes.ResultSuccess } -// CheckEPField checks if endpoints have valid field -// It return the result boolean with the corresponding reason -func (s *transactionSyncer) CheckEPField(err error) (bool, *negtypes.NegSyncResult) { - if errors.Is(err, negtypes.ErrEPMissingNodeName) { +// isValidEPField returns false and the corresponding reason if there is endpoint with missing zone or nodeName +func (s *transactionSyncer) isValidEPField(err error) (bool, string) { + if errors.Is(err, ErrEPMissingNodeName) { s.logger.Info("Detected unexpected error when checking missing nodeName", "error", err) - return false, negtypes.NewNegSyncResult(err, negtypes.ResultEPMissingNodeName) + return false, negtypes.ResultEPMissingNodeName } - if errors.Is(err, negtypes.ErrEPMissingZone) { + if errors.Is(err, ErrEPMissingZone) { s.logger.Info("Detected unexpected error when checking missing zone", "error", err) - return false, negtypes.NewNegSyncResult(err, negtypes.ResultEPMissingZone) - } - if errors.Is(err, negtypes.ErrNodeNotFound) { - return true, negtypes.NewNegSyncResult(err, negtypes.ResultNodeNotFound) - } - if err != nil { - return true, negtypes.NewNegSyncResult(nil, negtypes.ResultOtherError) - } else { - return true, negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess) + return false, negtypes.ResultEPMissingZone } + return true, negtypes.ResultSuccess } // isValidEPBatch returns false and the corresponding reason if the error from endpoint batch response is due to bad request @@ -433,7 +420,7 @@ func (s *transactionSyncer) isValidEPBatch(err error, operation transactionOp, n } // syncNetworkEndpoints spins off go routines to execute NEG operations -func (s *transactionSyncer) syncNetworkEndpoints(addEndpoints, removeEndpoints map[string]negtypes.NetworkEndpointSet) *negtypes.NegSyncResult { +func (s *transactionSyncer) syncNetworkEndpoints(addEndpoints, removeEndpoints map[string]negtypes.NetworkEndpointSet) error { var wg sync.WaitGroup syncFunc := func(endpointMap map[string]negtypes.NetworkEndpointSet, operation transactionOp) error { @@ -469,14 +456,14 @@ func (s *transactionSyncer) syncNetworkEndpoints(addEndpoints, removeEndpoints m } if err := syncFunc(addEndpoints, attachOp); err != nil { - return negtypes.NewNegSyncResult(err, negtypes.ResultOtherError) + return err } if err := syncFunc(removeEndpoints, detachOp); err != nil { - return negtypes.NewNegSyncResult(err, negtypes.ResultOtherError) + return err } go s.collectSyncResult(&wg) - return negtypes.NewNegSyncResult(nil, negtypes.ResultInProgress) + return nil } // collectSyncResult collects the result of the sync and emits the metrics for sync result diff --git a/pkg/neg/syncers/transaction_test.go b/pkg/neg/syncers/transaction_test.go index 64e946cf96..b146e8cacd 100644 --- a/pkg/neg/syncers/transaction_test.go +++ b/pkg/neg/syncers/transaction_test.go @@ -205,9 +205,9 @@ func TestTransactionSyncNetworkEndpoints(t *testing.T) { } for _, tc := range testCases { - syncResult := transactionSyncer.syncNetworkEndpoints(tc.addEndpoints, tc.removeEndpoints) - if syncResult.Error != nil { - t.Errorf("For case %q, syncNetworkEndpoints() got %v, want nil", tc.desc, syncResult.Error) + err := transactionSyncer.syncNetworkEndpoints(tc.addEndpoints, tc.removeEndpoints) + if err != nil { + t.Errorf("For case %q, syncNetworkEndpoints() got %v, want nil", tc.desc, err) } if err := waitForTransactions(transactionSyncer); err != nil { @@ -1392,7 +1392,7 @@ func TestUnknownNodes(t *testing.T) { } } -func TestCheckEndpointInfo(t *testing.T) { +func TestIsValidEndpointInfo(t *testing.T) { t.Parallel() _, transactionSyncer := newTestTransactionSyncer(negtypes.NewAdapter(gce.NewFakeGCECloud(gce.DefaultTestClusterValues())), negtypes.VmIpPortEndpointType, false) @@ -1453,7 +1453,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap map[negtypes.NetworkEndpoint]types.NamespacedName dupCount int expect bool - expectedResult *negtypes.NegSyncResult + expectedReason string }{ { desc: "counts equal, endpointData has no duplicated endpoints", @@ -1526,7 +1526,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: testEndpointPodMap, dupCount: 0, expect: true, - expectedResult: negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess), + expectedReason: "", }, { desc: "counts equal, endpointData has duplicated endpoints", @@ -1608,7 +1608,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: testEndpointPodMap, dupCount: 1, expect: true, - expectedResult: negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess), + expectedReason: "", }, { desc: "counts not equal, endpointData has no duplicated endpoints", @@ -1672,7 +1672,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: testEndpointPodMap, dupCount: 0, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPCountsDiffer, negtypes.ResultEPCountsDiffer), + expectedReason: negtypes.ResultEPCountsDiffer, }, { desc: "counts not equal, endpointData has duplicated endpoints", @@ -1745,7 +1745,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: testEndpointPodMap, dupCount: 1, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPCountsDiffer, negtypes.ResultEPCountsDiffer), + expectedReason: negtypes.ResultEPCountsDiffer, }, { desc: "endpointData has zero endpoint", @@ -1780,7 +1780,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: testEndpointPodMap, dupCount: 0, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPSEndpointCountZero, negtypes.ResultEPSEndpointCountZero), + expectedReason: negtypes.ResultEPSEndpointCountZero, }, { desc: "endpointPodMap has zero endpoint", @@ -1853,7 +1853,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: map[negtypes.NetworkEndpoint]types.NamespacedName{}, dupCount: 0, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPCalculationCountZero, negtypes.ResultEPCalculationCountZero), + expectedReason: negtypes.ResultEPCalculationCountZero, }, { desc: "endpointData and endpointPodMap both have zero endpoint", @@ -1888,7 +1888,7 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: map[negtypes.NetworkEndpoint]types.NamespacedName{}, dupCount: 0, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPCalculationCountZero, negtypes.ResultEPCalculationCountZero), // PodMap count is check and returned first, + expectedReason: negtypes.ResultEPCalculationCountZero, // PodMap count is check and returned first, }, { desc: "endpointData and endpointPodMap both have non-zero endpoints", @@ -1961,27 +1961,20 @@ func TestCheckEndpointInfo(t *testing.T) { endpointPodMap: testEndpointPodMap, dupCount: 0, expect: true, - expectedResult: negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess), + expectedReason: negtypes.ResultSuccess, }, } for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { - got, result := transactionSyncer.CheckEndpointInfo(tc.endpointsData, tc.endpointPodMap, tc.dupCount) - if got != tc.expect { - t.Errorf("CheckEndpointInfo() got valid = %t, expected %t", got, tc.expect) - } - if result.Result != tc.expectedResult.Result { - t.Errorf("CheckEndpointInfo() got result = %s, expected %s", result.Result, tc.expectedResult.Result) - } - if !errors.Is(result.Error, tc.expectedResult.Error) { - t.Errorf("CheckEndpointInfo() got error = %t, expected %t", result.Error, tc.expectedResult.Error) + if got, reason := transactionSyncer.isValidEndpointInfo(tc.endpointsData, tc.endpointPodMap, tc.dupCount); got != tc.expect && reason != tc.expectedReason { + t.Errorf("invalidEndpointInfo() = %t, expected %t", got, tc.expect) } }) } } -func TestCheckEPField(t *testing.T) { +func TestIsValidEPField(t *testing.T) { t.Parallel() _, transactionSyncer := newTestTransactionSyncer(negtypes.NewAdapter(gce.NewFakeGCECloud(gce.DefaultTestClusterValues())), negtypes.VmIpPortEndpointType, false) @@ -2011,7 +2004,7 @@ func TestCheckEPField(t *testing.T) { desc string endpointsData []negtypes.EndpointsData expect bool - expectedResult *negtypes.NegSyncResult + expectedReason string }{ { desc: "no missing zone or nodeName", @@ -2082,7 +2075,7 @@ func TestCheckEPField(t *testing.T) { }, }, expect: true, - expectedResult: negtypes.NewNegSyncResult(nil, negtypes.ResultSuccess), + expectedReason: negtypes.ResultSuccess, }, { desc: "contains one missing nodeName", @@ -2153,7 +2146,7 @@ func TestCheckEPField(t *testing.T) { }, }, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPMissingNodeName, negtypes.ResultEPMissingNodeName), + expectedReason: negtypes.ResultEPMissingNodeName, }, { desc: "contains one empty nodeName", @@ -2224,7 +2217,7 @@ func TestCheckEPField(t *testing.T) { }, }, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPMissingNodeName, negtypes.ResultEPMissingNodeName), + expectedReason: negtypes.ResultEPMissingNodeName, }, { desc: "contains one missing zone", @@ -2295,21 +2288,14 @@ func TestCheckEPField(t *testing.T) { }, }, expect: false, - expectedResult: negtypes.NewNegSyncResult(negtypes.ErrEPMissingZone, negtypes.ResultEPMissingZone), + expectedReason: negtypes.ResultEPMissingZone, }, } for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { _, _, _, err := transactionSyncer.endpointsCalculator.CalculateEndpoints(tc.endpointsData, nil) - got, result := transactionSyncer.CheckEPField(err) - if got != tc.expect { - t.Errorf("CheckEPField() got valid = %t, expected %t", got, tc.expect) - } - if result.Result != tc.expectedResult.Result { - t.Errorf("CheckEPField() got result = %s, expected %s", result.Result, tc.expectedResult.Result) - } - if !errors.Is(result.Error, tc.expectedResult.Error) { - t.Errorf("CheckEPField() got error = %t, expected %t", result.Error, tc.expectedResult.Error) + if got, reason := transactionSyncer.isValidEPField(err); got != tc.expect && reason != tc.expectedReason { + t.Errorf("isValidEPField() = %t, expected %t, err: %v, ", got, tc.expect, err) } }) } diff --git a/pkg/neg/syncers/utils.go b/pkg/neg/syncers/utils.go index 16d198e7e0..6c17e46f09 100644 --- a/pkg/neg/syncers/utils.go +++ b/pkg/neg/syncers/utils.go @@ -17,6 +17,7 @@ limitations under the License. package syncers import ( + "errors" "fmt" "strconv" "strings" @@ -45,6 +46,12 @@ const ( separator = "||" ) +var ( + ErrEPMissingNodeName = errors.New("endpoint has empty nodeName field") + ErrNodeNotFound = errors.New("failed to retrieve associated zone of node") + ErrEPMissingZone = errors.New("endpoint has empty zone field") +) + // encodeEndpoint encodes ip and instance into a single string func encodeEndpoint(ip, instance, port string) string { return strings.Join([]string{ip, instance, port}, separator) @@ -246,7 +253,7 @@ func toZoneNetworkEndpointMap(eds []negtypes.EndpointsData, zoneGetter negtypes. for _, endpointAddress := range ed.Addresses { if endpointAddress.NodeName == nil || len(*endpointAddress.NodeName) == 0 { klog.V(2).Infof("Endpoint %q in Endpoints %s/%s does not have an associated node. Skipping", endpointAddress.Addresses, ed.Meta.Namespace, ed.Meta.Name) - return nil, nil, dupCount, negtypes.ErrEPMissingNodeName + return nil, nil, dupCount, ErrEPMissingNodeName } if endpointAddress.TargetRef == nil { klog.V(2).Infof("Endpoint %q in Endpoints %s/%s does not have an associated pod. Skipping", endpointAddress.Addresses, ed.Meta.Namespace, ed.Meta.Name) @@ -254,10 +261,10 @@ func toZoneNetworkEndpointMap(eds []negtypes.EndpointsData, zoneGetter negtypes. } zone, err := zoneGetter.GetZoneForNode(*endpointAddress.NodeName) if err != nil { - return nil, nil, dupCount, negtypes.ErrNodeNotFound + return nil, nil, dupCount, ErrNodeNotFound } if zone == "" { - return nil, nil, dupCount, negtypes.ErrEPMissingZone + return nil, nil, dupCount, ErrEPMissingZone } if zoneNetworkEndpointMap[zone] == nil { zoneNetworkEndpointMap[zone] = negtypes.NewNetworkEndpointSet()