diff --git a/pkg/neg/metrics/metrics.go b/pkg/neg/metrics/metrics.go index 7265060908..b9b183f554 100644 --- a/pkg/neg/metrics/metrics.go +++ b/pkg/neg/metrics/metrics.go @@ -32,6 +32,7 @@ const ( negOpEndpointsKey = "neg_operation_endpoints" lastSyncTimestampKey = "sync_timestamp" syncerStalenessKey = "syncer_staleness" + epsStalenessKey = "endpointslice_staleness" resultSuccess = "success" resultError = "error" @@ -135,6 +136,15 @@ var ( Subsystem: negControllerSubsystem, Name: syncerStalenessKey, Help: "The duration of a syncer since it last syncs", + }, + ) + + // EPSStaleness tracks for every endpoint slice, how long since it was last processed + EPSStaleness = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Subsystem: negControllerSubsystem, + Name: epsStalenessKey, + Help: "The duration for an endpoint slice since it was last processed by syncer", // custom buckets - [1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s(~4min), 512s(~8min), 1024s(~17min), 2048 (~34min), 4096(~68min), 8192(~136min), +Inf] Buckets: prometheus.ExponentialBuckets(1, 2, 14), }, @@ -152,6 +162,7 @@ func RegisterMetrics() { prometheus.MustRegister(LastSyncTimestamp) prometheus.MustRegister(InitializationLatency) prometheus.MustRegister(SyncerStaleness) + prometheus.MustRegister(EPSStaleness) RegisterSyncerMetrics() }) @@ -187,6 +198,10 @@ func PublishNegSyncerStalenessMetrics(syncerStaleness time.Duration) { SyncerStaleness.Observe(syncerStaleness.Seconds()) } +func PublishNegEPSStalenessMetrics(epsStaleness time.Duration) { + EPSStaleness.Observe(epsStaleness.Seconds()) +} + func getResult(err error) string { if err != nil { return resultError diff --git a/pkg/neg/syncers/transaction.go b/pkg/neg/syncers/transaction.go index cc4614ae3d..15727b2495 100644 --- a/pkg/neg/syncers/transaction.go +++ b/pkg/neg/syncers/transaction.go @@ -245,8 +245,25 @@ func (s *transactionSyncer) syncInternalImpl() error { return nil } endpointSlices := make([]*discovery.EndpointSlice, len(slices)) + negCR, err := getNegFromStore(s.svcNegLister, s.Namespace, s.NegSyncerKey.NegName) for i, slice := range slices { - endpointSlices[i] = slice.(*discovery.EndpointSlice) + endpointslice := slice.(*discovery.EndpointSlice) + endpointSlices[i] = endpointslice + if err != nil { + s.logger.Error(err, "unable to retrieve neg from the store", "neg", klog.KRef(s.Namespace, s.NegName)) + continue + } + lastSyncTimestamp := negCR.Status.LastSyncTime + epsCreationTimestamp := endpointslice.ObjectMeta.CreationTimestamp + + epsStaleness := time.Since(lastSyncTimestamp.Time) + // if this endpoint slice is newly created/created after last sync + if lastSyncTimestamp.Before(&epsCreationTimestamp) { + epsStaleness = time.Since(epsCreationTimestamp.Time) + } + metrics.PublishNegEPSStalenessMetrics(epsStaleness) + s.logger.V(3).Info("Endpoint slice syncs", "Namespace", endpointslice.Namespace, "Name", endpointslice.Name, "staleness", epsStaleness) + } endpointsData := negtypes.EndpointsDataFromEndpointSlices(endpointSlices) targetMap, endpointPodMap, dupCount, err = s.endpointsCalculator.CalculateEndpoints(endpointsData, currentMap)