From d1c8a8e1885415aed725839c91e4af5a3cdd26f7 Mon Sep 17 00:00:00 2001 From: Quan Tian Date: Thu, 3 Aug 2023 19:14:13 +0800 Subject: [PATCH] Fix error log when agent fails to connect to K8s API When agent fails to connect to K8s API, the previous error logs about empty PodCIDR has no basis, and it shouldn't suggest users to check kube-controller-manager configuration. This patch fixes the error logs by differentiating ErrWaitTimeout from other errors. Only the former means the PodCIDR is empty. Signed-off-by: Quan Tian --- pkg/agent/agent.go | 15 ++++++----- pkg/agent/agent_test.go | 60 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 63 insertions(+), 12 deletions(-) diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index 62b51ec7152..f0799fe43d5 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -96,6 +96,8 @@ var ( // Declared as variables for testing. defaultFs = afero.NewOsFs() clock clockutils.WithTicker = &clockutils.RealClock{} + + getNodeTimeout = 30 * time.Second ) // Initializer knows how to setup host networking, OpenVSwitch, and Openflow. @@ -905,7 +907,7 @@ func (i *Initializer) setTunnelCsum(tunnelPortName string, enable bool) error { // host gateway interface. func (i *Initializer) initK8sNodeLocalConfig(nodeName string) error { var node *v1.Node - if err := wait.PollImmediate(5*time.Second, 30*time.Second, func() (bool, error) { + if err := wait.PollImmediate(5*time.Second, getNodeTimeout, func() (bool, error) { var err error node, err = i.client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) if err != nil { @@ -916,18 +918,19 @@ func (i *Initializer) initK8sNodeLocalConfig(nodeName string) error { if !i.networkConfig.TrafficEncapMode.IsNetworkPolicyOnly() { // Validate that PodCIDR has been configured. if node.Spec.PodCIDRs == nil && node.Spec.PodCIDR == "" { - klog.V(2).Info("Waiting for Node PodCIDR configuration to complete") + klog.InfoS("Waiting for Node PodCIDR configuration to complete", "nodeName", nodeName) return false, nil } } return true, nil }); err != nil { - if node != nil && node.Spec.PodCIDRs == nil && node.Spec.PodCIDR == "" { + if err == wait.ErrWaitTimeout { klog.ErrorS(err, "Spec.PodCIDR is empty for Node. Please make sure --allocate-node-cidrs is enabled "+ - "for kube-controller-manager and --cluster-cidr specifies a sufficient CIDR range", "nodeName", nodeName) - return fmt.Errorf("CIDR string is empty for Node %s", nodeName) + "for kube-controller-manager and --cluster-cidr specifies a sufficient CIDR range, or nodeIPAM is "+ + "enabled for antrea-controller", "nodeName", nodeName) + return fmt.Errorf("Spec.PodCIDR is empty for Node %s", nodeName) } - return fmt.Errorf("node retrieval failed with the following error: %v", err) + return err } // nodeInterface is the interface that has K8s Node IP. transportInterface is the interface that is used for diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index 625731553a2..e8d8e228d32 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -30,8 +30,10 @@ import ( "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" clockutils "k8s.io/utils/clock" clocktesting "k8s.io/utils/clock/testing" @@ -210,12 +212,15 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportAddresses := strings.Join([]string{testTransportIface.ipV4Net.IP.String(), testTransportIface.ipV6Net.IP.String()}, ",") tests := []struct { name string + getNodeReaction k8stesting.ReactionFunc trafficEncapMode config.TrafficEncapModeType transportIfName string transportIfCIDRs []string transportInterface *testTransInterface tunnelType ovsconfig.TunnelType mtu int + podCIDR string + expectedErr string expectedMTU int expectedNodeLocalIfaceMTU int expectedNodeAnnotation map[string]string @@ -224,6 +229,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { name: "noencap mode", trafficEncapMode: config.TrafficEncapModeNoEncap, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1500, expectedNodeAnnotation: map[string]string{types.NodeMACAddressAnnotationKey: macAddr.String()}, @@ -232,6 +238,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { name: "hybrid mode", trafficEncapMode: config.TrafficEncapModeHybrid, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1500, expectedNodeAnnotation: map[string]string{types.NodeMACAddressAnnotationKey: macAddr.String()}, @@ -241,6 +248,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { trafficEncapMode: config.TrafficEncapModeEncap, tunnelType: ovsconfig.GeneveTunnel, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1450, expectedNodeAnnotation: nil, @@ -250,6 +258,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { trafficEncapMode: config.TrafficEncapModeEncap, tunnelType: ovsconfig.GeneveTunnel, mtu: 1400, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1400, expectedNodeAnnotation: nil, @@ -260,6 +269,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportIfName: testTransportIface.iface.Name, transportInterface: testTransportIface, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1500, expectedNodeAnnotation: map[string]string{ @@ -273,6 +283,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportIfName: testTransportIface.iface.Name, transportInterface: testTransportIface, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1500, expectedNodeAnnotation: map[string]string{ @@ -287,6 +298,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportInterface: testTransportIface, tunnelType: ovsconfig.GeneveTunnel, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1450, expectedNodeAnnotation: map[string]string{ @@ -300,6 +312,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportInterface: testTransportIface, tunnelType: ovsconfig.GeneveTunnel, mtu: 1400, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1400, expectedNodeAnnotation: map[string]string{ @@ -312,6 +325,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportIfCIDRs: transportCIDRs, transportInterface: testTransportIface, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1500, expectedNodeAnnotation: map[string]string{ @@ -325,6 +339,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportIfCIDRs: transportCIDRs, transportInterface: testTransportIface, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1500, expectedNodeAnnotation: map[string]string{ @@ -339,6 +354,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportInterface: testTransportIface, tunnelType: ovsconfig.GeneveTunnel, mtu: 0, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1450, expectedNodeAnnotation: map[string]string{ @@ -352,12 +368,28 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { transportInterface: testTransportIface, tunnelType: ovsconfig.GeneveTunnel, mtu: 1400, + podCIDR: podCIDRStr, expectedNodeLocalIfaceMTU: 1500, expectedMTU: 1400, expectedNodeAnnotation: map[string]string{ types.NodeTransportAddressAnnotationKey: transportAddresses, }, }, + { + name: "error getting Node", + getNodeReaction: func(action k8stesting.Action) (handled bool, ret k8sruntime.Object, err error) { + return true, nil, fmt.Errorf("connection error") + }, + trafficEncapMode: config.TrafficEncapModeEncap, + tunnelType: ovsconfig.GeneveTunnel, + expectedErr: "failed to get Node with name node1 from K8s: connection error", + }, + { + name: "empty node podCIDR", + trafficEncapMode: config.TrafficEncapModeEncap, + tunnelType: ovsconfig.GeneveTunnel, + expectedErr: "Spec.PodCIDR is empty for Node node1", + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -366,7 +398,7 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { Name: nodeName, }, Spec: corev1.NodeSpec{ - PodCIDR: podCIDRStr, + PodCIDR: tt.podCIDR, }, Status: corev1.NodeStatus{ Addresses: []corev1.NodeAddress{ @@ -378,6 +410,10 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { }, } client := fake.NewSimpleClientset(node) + if tt.getNodeReaction != nil { + client.PrependReactor("get", "nodes", tt.getNodeReaction) + } + ifaceStore := interfacestore.NewInterfaceStore() expectedNodeConfig := config.NodeConfig{ Name: nodeName, @@ -417,12 +453,18 @@ func TestInitK8sNodeLocalConfig(t *testing.T) { } defer mockGetIPNetDeviceFromIP(nodeIPNet, ipDevice)() defer mockNodeNameEnv(nodeName)() + defer mockGetNodeTimeout(100 * time.Millisecond) - require.NoError(t, initializer.initK8sNodeLocalConfig(nodeName)) - assert.Equal(t, expectedNodeConfig, *initializer.nodeConfig) - node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) - require.NoError(t, err) - assert.Equal(t, tt.expectedNodeAnnotation, node.Annotations) + err := initializer.initK8sNodeLocalConfig(nodeName) + if tt.expectedErr == "" { + require.NoError(t, err) + assert.Equal(t, expectedNodeConfig, *initializer.nodeConfig) + node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeName, metav1.GetOptions{}) + require.NoError(t, err) + assert.Equal(t, tt.expectedNodeAnnotation, node.Annotations) + } else { + assert.ErrorContains(t, err, tt.expectedErr) + } }) } } @@ -440,6 +482,12 @@ func mockNodeNameEnv(name string) func() { return func() { os.Unsetenv(env.NodeNameEnvKey) } } +func mockGetNodeTimeout(timeout time.Duration) func() { + prevTimeout := getNodeTimeout + getNodeTimeout = timeout + return func() { getNodeTimeout = prevTimeout } +} + func mockGetTransportIPNetDeviceByName(ipV4Net, ipV6Net *net.IPNet, ipDevice *net.Interface) func() { prevGetIPNetDeviceByName := getTransportIPNetDeviceByName getTransportIPNetDeviceByName = func(ifName, brName string) (*net.IPNet, *net.IPNet, *net.Interface, error) {