From dded2119eebd023c39a4e02d5fd9f059caef6beb Mon Sep 17 00:00:00 2001 From: Quan Tian Date: Thu, 2 Sep 2021 12:33:10 +0800 Subject: [PATCH] [WireGuard] Fix service traffic requiring SNAT (#2697) The service traffic requiring SNAT couldn't be transferred to peer Node when the endpoint Pod is on another Node. This was because we didn't set any address on WireGuard device antrea-wg0. Therefore, when iptables MASQUERADE action took effect, it chose one IP from other interfaces, which might not be the gateway address on antrea-gw0. This caused two problems: 1. Peer wireguard didn't accept the packet as its source address was not in its "allowed ips" 2. Peer Node wouldn't route the response back via the encrypted tunnel as the destination IP was not in its "allowed ips" This patch fixes it by assigning the gateway IPs on the WireGuard device. But it uses "/32" mask for IPv4 address and "/128" mask for IPv6 address to avoid impacting routes on Antrea gateway. Signed-off-by: Quan Tian --- pkg/agent/agent.go | 17 ++++++++-------- pkg/agent/wireguard/client_linux.go | 24 ++++++++++++++++++++++ pkg/agent/wireguard/client_windows.go | 1 + test/e2e/wireguard_test.go | 29 +++++++++++++++++++++++++-- 4 files changed, 61 insertions(+), 10 deletions(-) diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index b766bb45115..e237a56910b 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -269,6 +269,15 @@ func (i *Initializer) Initialize() error { return err } + if err := i.prepareHostNetwork(); err != nil { + return err + } + + if err := i.setupOVSBridge(); err != nil { + return err + } + + // initializeWireGuard must be executed after setupOVSBridge as it requires gateway addresses on the OVS bridge. switch i.networkConfig.TrafficEncryptionMode { case config.TrafficEncryptionModeIPSec: if err := i.initializeIPSec(); err != nil { @@ -280,14 +289,6 @@ func (i *Initializer) Initialize() error { } } - if err := i.prepareHostNetwork(); err != nil { - return err - } - - if err := i.setupOVSBridge(); err != nil { - return err - } - wg.Add(1) // routeClient.Initialize() should be after i.setupOVSBridge() which // creates the host gateway interface. diff --git a/pkg/agent/wireguard/client_linux.go b/pkg/agent/wireguard/client_linux.go index 1aea2d869b3..b55ec9a74db 100644 --- a/pkg/agent/wireguard/client_linux.go +++ b/pkg/agent/wireguard/client_linux.go @@ -38,6 +38,7 @@ import ( "antrea.io/antrea/pkg/agent/config" "antrea.io/antrea/pkg/agent/types" + "antrea.io/antrea/pkg/agent/util" ) const defaultWireGuardInterfaceName = "antrea-wg0" @@ -61,6 +62,7 @@ type client struct { privateKey wgtypes.Key peerPublicKeyByNodeName *sync.Map wireGuardConfig *config.WireGuardConfig + gatewayConfig *config.GatewayConfig } func New(clientSet clientset.Interface, nodeConfig *config.NodeConfig, wireGuardConfig *config.WireGuardConfig) (Interface, error) { @@ -77,6 +79,7 @@ func New(clientSet clientset.Interface, nodeConfig *config.NodeConfig, wireGuard k8sClient: clientSet, wireGuardConfig: wireGuardConfig, peerPublicKeyByNodeName: &sync.Map{}, + gatewayConfig: nodeConfig.GatewayConfig, } return c, nil } @@ -94,6 +97,27 @@ func (client *client) Init() error { if err := netlink.LinkSetUp(link); err != nil { return err } + // Configure the IP addresses same as Antrea gateway so iptables MASQUERADE target will select it as source address. + // It's necessary to make Service traffic requiring SNAT (e.g. host to ClusterIP, external to NodePort) accepted by + // peer Node and to make their response routed back correctly. + // It uses "/32" mask for IPv4 address and "/128" mask for IPv6 address to avoid impacting routes on Antrea gateway. + var gatewayIPs []*net.IPNet + if client.gatewayConfig.IPv4 != nil { + gatewayIPs = append(gatewayIPs, &net.IPNet{ + IP: client.gatewayConfig.IPv4, + Mask: net.CIDRMask(32, 32), + }) + } + if client.gatewayConfig.IPv6 != nil { + gatewayIPs = append(gatewayIPs, &net.IPNet{ + IP: client.gatewayConfig.IPv6, + Mask: net.CIDRMask(128, 128), + }) + } + // This must be executed after netlink.LinkSetUp as the latter ensures link.Attrs().Index is set. + if err := util.ConfigureLinkAddresses(link.Attrs().Index, gatewayIPs); err != nil { + return err + } client.wireGuardConfig.LinkIndex = link.Attrs().Index wgDev, err := client.wgClient.Device(client.wireGuardConfig.Name) if err != nil { diff --git a/pkg/agent/wireguard/client_windows.go b/pkg/agent/wireguard/client_windows.go index 43c3c49a36a..8292a861aff 100644 --- a/pkg/agent/wireguard/client_windows.go +++ b/pkg/agent/wireguard/client_windows.go @@ -1,3 +1,4 @@ +//go:build windows // +build windows // Copyright 2021 Antrea Authors diff --git a/test/e2e/wireguard_test.go b/test/e2e/wireguard_test.go index 205ba4586bc..bfdc4baf062 100644 --- a/test/e2e/wireguard_test.go +++ b/test/e2e/wireguard_test.go @@ -16,11 +16,14 @@ package e2e import ( "fmt" + "net" + "strconv" "strings" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" "antrea.io/antrea/pkg/agent/config" "antrea.io/antrea/pkg/apis" @@ -66,7 +69,8 @@ func TestWireGuard(t *testing.T) { defer data.redeployAntrea(t, deployAntreaDefault) } - t.Run("testWireGuardTunnelConnectivity", func(t *testing.T) { testWireGuardTunnelConnectivity(t, data) }) + t.Run("testPodConnectivity", func(t *testing.T) { testPodConnectivity(t, data) }) + t.Run("testServiceConnectivity", func(t *testing.T) { testServiceConnectivity(t, data) }) } func (data *TestData) getWireGuardPeerEndpointsWithHandshake(nodeName string) ([]string, error) { @@ -101,7 +105,7 @@ func (data *TestData) getWireGuardPeerEndpointsWithHandshake(nodeName string) ([ return peerEndpoints, nil } -func testWireGuardTunnelConnectivity(t *testing.T, data *TestData) { +func testPodConnectivity(t *testing.T, data *TestData) { podInfos, deletePods := createPodsOnDifferentNodes(t, data, "differentnodes") defer deletePods() numPods := 2 @@ -123,3 +127,24 @@ func testWireGuardTunnelConnectivity(t *testing.T, data *TestData) { assert.Contains(t, endpoints, fmt.Sprintf("%s:%d", nodeIP, apis.WireGuardListenPort)) } } + +// testServiceConnectivity verifies host-to-service can be transferred through the encrypted tunnel correctly. +func testServiceConnectivity(t *testing.T, data *TestData) { + clientPodName := "hostnetwork-pod" + svcName := "agnhost" + clientPodNode := nodeName(0) + serverPodNode := nodeName(1) + svc, cleanup := data.createAgnhostServiceAndBackendPods(t, svcName, serverPodNode, corev1.ServiceTypeNodePort) + defer cleanup() + + // Create the a hostNetwork Pod on a Node different from the service's backend Pod, so the service traffic will be transferred across the tunnel. + require.NoError(t, data.createPodOnNode(clientPodName, testNamespace, clientPodNode, busyboxImage, []string{"sleep", strconv.Itoa(3600)}, nil, nil, nil, true, nil)) + defer data.deletePodAndWait(defaultTimeout, clientPodName, testNamespace) + require.NoError(t, data.podWaitForRunning(defaultTimeout, clientPodName, testNamespace)) + + err := data.runNetcatCommandFromTestPod(clientPodName, testNamespace, svc.Spec.ClusterIP, 80) + require.NoError(t, err, "Pod %s should be able to connect the service's ClusterIP %s, but was not able to connect", clientPodName, net.JoinHostPort(svc.Spec.ClusterIP, fmt.Sprint(80))) + + err = data.runNetcatCommandFromTestPod(clientPodName, testNamespace, "127.0.0.1", svc.Spec.Ports[0].NodePort) + require.NoError(t, err, "Pod %s should be able to connect the service's NodePort 127.0.0.1:%s, but was not able to connect", clientPodName, svc.Spec.Ports[0].NodePort) +}