Skip to content

Commit

Permalink
Fix a flaky failure in VM Agent e2e test "testExternalNode" (antrea-i…
Browse files Browse the repository at this point in the history
…o#6154)

The failure occurs when the check is performed before the default route on the
uplink is removed, then two interfaces are found, which leads to a mismatch with
the expectation.

The fix is to introduce a retry when checking the VM network configurations, and
add a check on the count of the default route before return.

Signed-off-by: Wenying Dong <wenyingd@vmware.com>
  • Loading branch information
wenyingd committed Apr 3, 2024
1 parent 49a49be commit 316a245
Showing 1 changed file with 72 additions and 36 deletions.
108 changes: 72 additions & 36 deletions test/e2e/vmagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,15 +236,17 @@ func setupVMAgentTest(t *testing.T, data *TestData) ([]vmInfo, error) {
vms := strings.Split(testOptions.linuxVMs, " ")
for _, vm := range vms {
t.Logf("Get info for Linux VM: %s", vm)
tempVM := getVMInfo(t, data, vm)
tempVM, err := getVMInfo(t, data, vm)
require.NoError(t, err)
vmList = append(vmList, tempVM)
}
}
if testOptions.windowsVMs != "" {
vms := strings.Split(testOptions.windowsVMs, " ")
for _, vm := range vms {
t.Logf("Get info for Windows VM: %s", vm)
tempVM := getWindowsVMInfo(t, data, vm)
tempVM, err := getWindowsVMInfo(t, data, vm)
require.NoError(t, err)
vmList = append(vmList, tempVM)
}
}
Expand All @@ -267,11 +269,13 @@ func teardownVMAgentTest(t *testing.T, data *TestData, vmList []vmInfo) {
verifyUpLinkAfterCleanup := func(vm vmInfo) {
err := wait.PollImmediate(10*time.Second, 1*time.Minute, func() (done bool, err error) {
var tempVM vmInfo
var getVMErr error
if vm.osType == linuxOS {
tempVM = getVMInfo(t, data, vm.nodeName)
tempVM, getVMErr = getVMInfo(t, data, vm.nodeName)
} else {
tempVM = getWindowsVMInfo(t, data, vm.nodeName)
tempVM, getVMErr = getWindowsVMInfo(t, data, vm.nodeName)
}
require.NoError(t, getVMErr)
if vm.ifName != tempVM.ifName {
t.Logf("Retry, unexpected uplink interface name, expected %s, got %s", vm.ifName, tempVM.ifName)
return false, nil
Expand Down Expand Up @@ -330,14 +334,26 @@ func testExternalNode(t *testing.T, data *TestData, vmList []vmInfo) {
exists, err := verifyInterfaceIsInOVS(t, data, vm)
return exists, err
})
assert.NoError(t, err, "Failed to verify host interface in OVS, vmInfo %+v", vm)
require.NoError(t, err, "Failed to verify host interface in OVS, vmInfo %+v", vm)

var tempVM vmInfo
if vm.osType == windowsOS {
tempVM = getWindowsVMInfo(t, data, vm.nodeName)
} else {
tempVM = getVMInfo(t, data, vm.nodeName)
}
err = wait.PollUntilContextTimeout(context.Background(), 2*time.Second, 20*time.Second, true, func(ctx context.Context) (done bool, err error) {
var getVMErr error
if vm.osType == windowsOS {
tempVM, getVMErr = getWindowsVMInfo(t, data, vm.nodeName)
} else {
tempVM, getVMErr = getVMInfo(t, data, vm.nodeName)
}
if getVMErr != nil {
return false, getVMErr
}
vmIFs := strings.Split(tempVM.ifName, "\n")
if len(vmIFs) > 1 {
return false, nil
}
return true, nil
})
require.NoError(t, err)
assert.Equal(t, vm.ifName, tempVM.ifName, "Failed to verify uplink interface")
assert.Equal(t, vm.ip, tempVM.ip, "Failed to verify uplink IP")
}
Expand All @@ -349,50 +365,70 @@ func testExternalNode(t *testing.T, data *TestData, vmList []vmInfo) {
}
}

func getVMInfo(t *testing.T, data *TestData, nodeName string) (info vmInfo) {
var vm vmInfo
vm.nodeName = nodeName
var cmd string
cmd = "ip -o -4 route show to default | awk '{print $5}'"
vm.osType = linuxOS
func getVMInfo(t *testing.T, data *TestData, nodeName string) (vmInfo, error) {
vm := vmInfo{nodeName: nodeName, osType: linuxOS}
cmd := "ip -o -4 route show to default | awk '{print $5}'"
rc, ifName, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)
}
vm.ifName = strings.TrimSpace(ifName)

cmd = fmt.Sprintf("ifconfig %s | awk '/inet / {print $2}'| sed 's/addr://'", vm.ifName)
rc, ifIP, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)
}
vm.ip = strings.TrimSpace(ifIP)
return vm

return vm, nil
}

func getWindowsVMInfo(t *testing.T, data *TestData, nodeName string) (vm vmInfo) {
func getWindowsVMInfo(t *testing.T, data *TestData, nodeName string) (vmInfo, error) {
var err error
vm.nodeName = nodeName
vm.osType = windowsOS
vm := vmInfo{nodeName: nodeName, osType: windowsOS}
cmd := fmt.Sprintf("powershell 'Get-WmiObject -Class Win32_IP4RouteTable | Where { $_.destination -eq \"0.0.0.0\" -and $_.mask -eq \"0.0.0.0\"} | Sort-Object metric1 | select interfaceindex | ft -HideTableHeaders'")
rc, ifIndex, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIndex, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIndex, stderr)
}
vm.ifIndex = strings.TrimSpace(ifIndex)

cmd = fmt.Sprintf("powershell 'Get-NetAdapter -IfIndex %s | select name | ft -HideTableHeaders'", vm.ifIndex)
rc, ifName, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifName, stderr)
}
vm.ifName = strings.TrimSpace(ifName)

cmd = fmt.Sprintf("powershell 'Get-NetIPAddress -AddressFamily IPv4 -ifIndex %s| select IPAddress| ft -HideTableHeaders'", vm.ifIndex)
rc, ifIP, stderr, err := data.RunCommandOnNode(nodeName, cmd)
require.NoError(t, err, "Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
require.Equal(t, 0, rc, "Failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)

if err != nil {
t.Logf("Failed to run command <%s> on VM %s, err %v", cmd, nodeName, err)
return vm, err
}
if rc != 0 {
return vm, fmt.Errorf("failed to run command: <%s>, stdout: <%v>, stderr: <%v>", cmd, ifIP, stderr)
}
vm.ip = strings.TrimSpace(ifIP)
return vm

return vm, nil
}

func startAntreaAgent(t *testing.T, data *TestData, vm vmInfo) {
Expand Down

0 comments on commit 316a245

Please sign in to comment.