Skip to content

Commit

Permalink
add retry logic for LinkList and AddrList netlink calls in cni
Browse files Browse the repository at this point in the history
  • Loading branch information
plamen-bardarov committed Oct 30, 2024
1 parent 8d5f72f commit 2f28032
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 4 deletions.
43 changes: 43 additions & 0 deletions src/code.cloudfoundry.org/lib/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,53 @@ package common

import (
"code.cloudfoundry.org/lager/v3/lagerflags"
"fmt"
"math"
"syscall"
"time"
)

type RetryableFunc[T any] func() (T, error)

func GetLagerConfig() lagerflags.LagerConfig {
lagerConfig := lagerflags.DefaultLagerConfig()
lagerConfig.TimeFormat = lagerflags.FormatRFC3339
return lagerConfig
}

// RetryWithBackoff retries a given function up to maxRetries times, with exponential backoff between attempts.
// interval is the initial interval between retries in milliseconds.
// T is a generic type parameter representing the return type of the function being retried.
// fn is the function to be retried, which returns a value of type T and an error.
func RetryWithBackoff[T any](interval int, maxRetries int, fn RetryableFunc[T]) (T, error) {
var result T
var err error
retryInterval := time.Duration(interval) * time.Millisecond

for retry := 0; retry < maxRetries; retry++ {
// Attempt the operation
result, err = fn()
if err == nil {
return result, nil // Success
}

// If the error is retryable, wait and retry
if isRetryableError(err) {
time.Sleep(time.Duration(math.Pow(2, float64(retry))) * retryInterval)
continue
}

// If error is not retryable, return it immediately
return result, err
}

// Retries exhausted, return the last error
return result, fmt.Errorf("failed after %d maxRetries: %w", maxRetries, err)
}

func isRetryableError(err error) bool {
if errno, ok := err.(syscall.Errno); ok {
return errno.Temporary()
}
return false
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
package interfacelookup

import (
"code.cloudfoundry.org/lib/common"
"fmt"
"net"

"github.com/vishvananda/netlink"
)

const (
retryInterval = 50
maxRetries = 4
)

//go:generate counterfeiter -o ../fakes/netlinkadapter.go --fake-name NetlinkAdapter . netlinkAdapter
type netlinkAdapter interface {
LinkList() ([]netlink.Link, error)
Expand All @@ -18,13 +24,19 @@ type InterfaceNameLookup struct {
}

func (i InterfaceNameLookup) GetNameFromIP(ip string) (string, error) {
links, err := i.NetlinkAdapter.LinkList()
links, err := common.RetryWithBackoff(retryInterval, maxRetries, func() ([]netlink.Link, error) {
return i.NetlinkAdapter.LinkList()
})

if err != nil {
return "", fmt.Errorf("discover interface names: %s", err)
}

for _, link := range links {
addresses, err := i.NetlinkAdapter.AddrList(link, netlink.FAMILY_V4)
addresses, err := common.RetryWithBackoff(retryInterval, maxRetries, func() ([]netlink.Addr, error) {
return i.NetlinkAdapter.AddrList(link, netlink.FAMILY_V4)
})

if err != nil {
return "", fmt.Errorf("failed to get underlay interface name by link for %s: %s", link.Attrs().Name, err)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package interfacelookup_test

import (
"errors"

"code.cloudfoundry.org/cni-wrapper-plugin/fakes"
"errors"
"golang.org/x/sys/unix"

"net"

Expand Down Expand Up @@ -98,6 +98,24 @@ var _ = Describe("InterfaceNameLookup", func() {
_, err := interfaceNameLookup.GetNameFromIP("10.0.0.0")
Expect(err).To(MatchError("discover interface names: sad meow"))
})

Context("and the error is dump interrupted error the first 3 tries", func() {
BeforeEach(func() {
netlinkAdapter.LinkListReturnsOnCall(0, nil, unix.EINTR)
netlinkAdapter.LinkListReturnsOnCall(1, nil, unix.EINTR)
netlinkAdapter.LinkListReturnsOnCall(2, nil, unix.EINTR)
netlinkAdapter.LinkListReturnsOnCall(3, []netlink.Link{
netlinkLinkEth0,
netlinkLinkEth1,
}, nil)
})

It("succeeds after 4 retries", func() {
_, err := interfaceNameLookup.GetNameFromIP("10.0.0.0")
Expect(err).NotTo(HaveOccurred())
Expect(netlinkAdapter.LinkListCallCount()).To(Equal(4))
})
})
})

Context("when it fails to fetch the AddrList", func() {
Expand Down

0 comments on commit 2f28032

Please sign in to comment.