Skip to content

Commit

Permalink
Add additional error states
Browse files Browse the repository at this point in the history
1. Update NodeNotFound to be an valid error state. This can be triggered
   by endpoints with non-existing nodeName.
2. Add PodNotFound error and add it as an error state. This can be
   triggered with endpoints with non-existing pod.
3. Add loggings for setting error state, and update return error so now
   it correctly sets error state.
  • Loading branch information
sawsa307 committed Mar 16, 2023
1 parent 20d21e9 commit ad344c7
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 11 deletions.
17 changes: 9 additions & 8 deletions pkg/neg/syncers/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ func (s *transactionSyncer) syncInternal() error {
start := time.Now()
err := s.syncInternalImpl()
if err != nil {
s.logger.V(3).Info("Setting error state", "err", err, "errorState", s.getErrorStateReason(err))
s.setErrorState(s.getErrorStateReason(err))
}
s.updateStatus(err)
Expand Down Expand Up @@ -261,14 +262,13 @@ func (s *transactionSyncer) syncInternalImpl() error {
endpointsData := negtypes.EndpointsDataFromEndpointSlices(endpointSlices)
targetMap, endpointPodMap, dupCount, err = s.endpointsCalculator.CalculateEndpoints(endpointsData, currentMap)
if err != nil {
s.setErrorState(s.getErrorStateReason(err))
return fmt.Errorf("endpoints calculation error in mode %q, err: %w", s.endpointsCalculator.Mode(), err)
return err
}
err = s.endpointsCalculator.ValidateEndpoints(endpointsData, endpointPodMap, dupCount)
if err != nil {
// TODO(cheungdavid): return error from ValidateEndpoint after degraded mode is implemented
// for now we don't return error so it won't break the sync
s.setErrorState(s.getErrorStateReason(err))
if !s.inErrorState() {
err = s.endpointsCalculator.ValidateEndpoints(endpointsData, endpointPodMap, dupCount)
if err != nil {
return err
}
}
s.logStats(targetMap, "desired NEG endpoints")

Expand Down Expand Up @@ -493,8 +493,9 @@ func (s *transactionSyncer) operationInternal(operation transactionOp, zone stri
endpointBatchErr := s.ValidateEndpointBatch(err, operation)
if endpointBatchErr != nil {
s.syncLock.Lock()
defer s.syncLock.Unlock()
s.logger.V(3).Info("Setting error state", "errorState", s.getErrorStateReason(endpointBatchErr))
s.setErrorState(s.getErrorStateReason(endpointBatchErr))
s.syncLock.Unlock()
}
}

Expand Down
15 changes: 14 additions & 1 deletion pkg/neg/syncers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func ensureNetworkEndpointGroup(svcNamespace, svcName, negName, zone, negService
}

// toZoneNetworkEndpointMap translates addresses in endpoints object into zone and endpoints map, and also return the count for duplicated endpoints
func toZoneNetworkEndpointMap(eds []negtypes.EndpointsData, zoneGetter negtypes.ZoneGetter, servicePortName string, networkEndpointType negtypes.NetworkEndpointType) (map[string]negtypes.NetworkEndpointSet, negtypes.EndpointPodMap, int, error) {
func toZoneNetworkEndpointMap(eds []negtypes.EndpointsData, podLister cache.Indexer, zoneGetter negtypes.ZoneGetter, servicePortName string, networkEndpointType negtypes.NetworkEndpointType) (map[string]negtypes.NetworkEndpointSet, negtypes.EndpointPodMap, int, error) {
zoneNetworkEndpointMap := map[string]negtypes.NetworkEndpointSet{}
networkEndpointPodMap := negtypes.EndpointPodMap{}
dupCount := 0
Expand Down Expand Up @@ -252,6 +252,19 @@ func toZoneNetworkEndpointMap(eds []negtypes.EndpointsData, zoneGetter negtypes.
klog.V(2).Infof("Endpoint %q in Endpoints %s/%s does not have an associated pod. Skipping", endpointAddress.Addresses, ed.Meta.Namespace, ed.Meta.Name)
continue
}

key := fmt.Sprintf("%s/%s", endpointAddress.TargetRef.Namespace, endpointAddress.TargetRef.Name)
obj, exists, err := podLister.GetByKey(key)
if err != nil || !exists {
klog.V(2).Infof("Endpoint %q in Endpoints %s/%s does not correspond to an existing pod. Skipping", endpointAddress.Addresses, ed.Meta.Namespace, ed.Meta.Name)
return nil, nil, dupCount, negtypes.ErrPodNotFound
}
_, ok := obj.(*apiv1.Pod)
if !ok {
klog.V(2).Infof("Endpoint %q in Endpoints %s/%s does not correspond to an existing pod. Skipping", endpointAddress.Addresses, ed.Meta.Namespace, ed.Meta.Name)
return nil, nil, dupCount, negtypes.ErrPodNotFound
}

zone, err := zoneGetter.GetZoneForNode(*endpointAddress.NodeName)
if err != nil {
return nil, nil, dupCount, negtypes.ErrNodeNotFound
Expand Down
7 changes: 6 additions & 1 deletion pkg/neg/syncers/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,12 @@ func TestEnsureNetworkEndpointGroup(t *testing.T) {

func TestToZoneNetworkEndpointMapUtil(t *testing.T) {
t.Parallel()

zoneGetter := negtypes.NewFakeZoneGetter()
testEndpointSlices := getDefaultEndpointSlices()
podLister := negtypes.NewTestContext().PodInformer.GetIndexer()
addPodsToLister(podLister, testEndpointSlices)

testCases := []struct {
desc string
portName string
Expand Down Expand Up @@ -530,7 +535,7 @@ func TestToZoneNetworkEndpointMapUtil(t *testing.T) {
}

for _, tc := range testCases {
retSet, retMap, _, err := toZoneNetworkEndpointMap(negtypes.EndpointsDataFromEndpointSlices(getDefaultEndpointSlices()), zoneGetter, tc.portName, tc.networkEndpointType)
retSet, retMap, _, err := toZoneNetworkEndpointMap(negtypes.EndpointsDataFromEndpointSlices(testEndpointSlices), podLister, zoneGetter, tc.portName, tc.networkEndpointType)
if err != nil {
t.Errorf("For case %q, expect nil error, but got %v.", tc.desc, err)
}
Expand Down
6 changes: 5 additions & 1 deletion pkg/neg/types/sync_results.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const (
ResultEPCountsDiffer = "EPCountsDiffer"
ResultEPMissingNodeName = "EPMissingNodeName"
ResultNodeNotFound = "NodeNotFound"
ResultPodNotFound = "PodNotFound"
ResultEPMissingZone = "EPMissingZone"
ResultEPSEndpointCountZero = "EPSEndpointCountZero"
ResultEPCalculationCountZero = "EPCalculationCountZero"
Expand All @@ -38,7 +39,8 @@ const (
var (
ErrEPCountsDiffer = errors.New("endpoint counts from endpointData and endpointPodMap differ")
ErrEPMissingNodeName = errors.New("endpoint has empty nodeName field")
ErrNodeNotFound = errors.New("failed to retrieve associated zone of node")
ErrPodNotFound = errors.New("failed to retrieve associated pod of the endpoint")
ErrNodeNotFound = errors.New("failed to retrieve associated node of the endpoint")
ErrEPMissingZone = errors.New("endpoint has empty zone field")
ErrEPSEndpointCountZero = errors.New("endpoint count from endpointData cannot be zero")
ErrEPCalculationCountZero = errors.New("endpoint count from endpointPodMap cannot be zero")
Expand All @@ -50,6 +52,8 @@ var (
ErrorStateResult = map[error]string{
ErrEPMissingNodeName: ResultEPMissingNodeName,
ErrEPMissingZone: ResultEPMissingZone,
ErrNodeNotFound: ResultNodeNotFound,
ErrPodNotFound: ResultNodeNotFound,
ErrEPCalculationCountZero: ResultEPCalculationCountZero,
ErrEPSEndpointCountZero: ResultEPSEndpointCountZero,
ErrEPCountsDiffer: ResultEPCountsDiffer,
Expand Down

0 comments on commit ad344c7

Please sign in to comment.