-
Notifications
You must be signed in to change notification settings - Fork 386
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Pod-to-external traffic on EKS in policyOnly mode #3975
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -365,21 +365,25 @@ func getLocalAntreaFlexibleIPAMPodIPSetName(isIPv6 bool) string { | |
} | ||
} | ||
|
||
// writeEKSMangleRule writes an additional iptables mangle rule to the | ||
// iptablesData buffer, which is required to ensure that the reverse path for | ||
// NodePort Service traffic is correct on EKS. | ||
// See https://github.com/antrea-io/antrea/issues/678. | ||
func (c *Client) writeEKSMangleRule(iptablesData *bytes.Buffer) { | ||
// writeEKSMangleRules writes an additional iptables mangle rule to the | ||
// iptablesData buffer, to set the traffic mark to the connection mark for | ||
// traffic coming out of the gateway. This rule is needed for 2 cases: | ||
// * for the reverse path for NodePort Service traffic (see | ||
// https://github.com/antrea-io/antrea/issues/678). | ||
// * for Pod-to-external traffic that needs to be SNATed (see | ||
// https://github.com/antrea-io/antrea/issues/3946). | ||
func (c *Client) writeEKSMangleRules(iptablesData *bytes.Buffer) { | ||
// TODO: the following should be taking into account: | ||
// 1) AWS_VPC_CNI_NODE_PORT_SUPPORT may be set to false (by default is | ||
// true), in which case we do not need to install the rule. | ||
// 1) this rule is only needed if AWS_VPC_CNI_NODE_PORT_SUPPORT is set | ||
// to true (which is the default) or if AWS_VPC_K8S_CNI_EXTERNALSNAT | ||
// is set to false (which is also the default). If both settings are | ||
// changed, we do not need to install the rule. | ||
// 2) this option is not documented but the mark value can be | ||
// configured with AWS_VPC_K8S_CNI_CONNMARK. | ||
// We could look for the rule added by AWS VPC CNI to the mangle | ||
// table. If it does not exist, we do not need to install this rule. If | ||
// it does exist we can scan for the mark value and use that in our | ||
// rule. | ||
klog.V(2).Infof("Add iptable mangle rule for EKS to ensure correct reverse path for NodePort Service traffic") | ||
// While we do not have access to these environment variables, we could | ||
// look for existing rules installed by the AWS VPC CNI, and determine | ||
// whether we need to install this rule. | ||
klog.V(2).InfoS("Add iptables mangle rules for EKS") | ||
writeLine(iptablesData, []string{ | ||
"-A", antreaMangleChain, | ||
"-m", "comment", "--comment", `"Antrea: AWS, primary ENI"`, | ||
|
@@ -388,6 +392,39 @@ func (c *Client) writeEKSMangleRule(iptablesData *bytes.Buffer) { | |
}...) | ||
} | ||
|
||
// writeEKSNATRules writes additional iptables nat rules to the iptablesData | ||
// buffer. The first rule sets the connection mark for Pod-to-external traffic | ||
// that needs to be SNATed. Without the mark, this traffic is not routed using | ||
// the correct route table for Pods getting an IP address from a secondary | ||
// network interface (ENI). The second rule restores the packet mark from the | ||
// connection mark. That rule will only apply to the first packet of the | ||
// connection. For subsequent packets, the rule installed buy writeEKSMangleRule | ||
// will take care of restoring the mark. We need that rule because the mangle | ||
// table is traversed before the nat table. | ||
// See https://docs.aws.amazon.com/eks/latest/userguide/external-snat.html and | ||
// https://github.com/antrea-io/antrea/issues/3946 for more details. | ||
func (c *Client) writeEKSNATRules(iptablesData *bytes.Buffer) { | ||
// TODO: just like for writeEKSMangleRule, these rules should ideally be | ||
// installed conditionally, when AWS_VPC_K8S_CNI_EXTERNALSNAT is set to | ||
// false (which is the default value). | ||
klog.V(2).InfoS("Add iptables nat rules for EKS") | ||
writeLine(iptablesData, []string{ | ||
"-A", antreaPreRoutingChain, | ||
"-i", c.nodeConfig.GatewayConfig.Name, | ||
"-m", "comment", "--comment", `"Antrea: AWS, outbound connections"`, | ||
"-j", "AWS-CONNMARK-CHAIN-0", | ||
}...) | ||
// The AWS VPC CNI already installs the same rule in the PREROUTING | ||
// chain. However, that rule will typically be visited before the | ||
// ANTREA-PREROUTING chain, hence we need to install our own copy of the | ||
// rule. | ||
writeLine(iptablesData, []string{ | ||
"-A", antreaPreRoutingChain, | ||
"-m", "comment", "--comment", `"Antrea: AWS, CONNMARK (first packet)"`, | ||
"-j", "CONNMARK", "--restore-mark", "--nfmask", "0x80", "--ctmask", "0x80", | ||
}...) | ||
} | ||
|
||
// syncIPTables ensure that the iptables infrastructure we use is set up. | ||
// It's idempotent and can safely be called on every startup. | ||
func (c *Client) syncIPTables() error { | ||
|
@@ -400,21 +437,25 @@ func (c *Client) syncIPTables() error { | |
// Create the antrea managed chains and link them to built-in chains. | ||
// We cannot use iptables-restore for these jump rules because there | ||
// are non antrea managed rules in built-in chains. | ||
jumpRules := []struct{ table, srcChain, dstChain, comment string }{ | ||
type jumpRule struct { | ||
table string | ||
srcChain string | ||
dstChain string | ||
comment string | ||
} | ||
jumpRules := []jumpRule{ | ||
{iptables.RawTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"}, | ||
{iptables.RawTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"}, | ||
{iptables.FilterTable, iptables.ForwardChain, antreaForwardChain, "Antrea: jump to Antrea forwarding rules"}, | ||
{iptables.NATTable, iptables.PostRoutingChain, antreaPostRoutingChain, "Antrea: jump to Antrea postrouting rules"}, | ||
{iptables.MangleTable, iptables.PreRoutingChain, antreaMangleChain, "Antrea: jump to Antrea mangle rules"}, // TODO: unify the chain naming style | ||
{iptables.MangleTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"}, | ||
} | ||
if c.proxyAll || env.IsCloudEKS() { | ||
jumpRules = append(jumpRules, jumpRule{iptables.NATTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"}) | ||
} | ||
if c.proxyAll { | ||
jumpRules = append(jumpRules, | ||
[]struct{ table, srcChain, dstChain, comment string }{ | ||
{iptables.NATTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"}, | ||
{iptables.NATTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"}, | ||
}..., | ||
) | ||
jumpRules = append(jumpRules, jumpRule{iptables.NATTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"}) | ||
} | ||
for _, rule := range jumpRules { | ||
if err := c.ipt.EnsureChain(iptables.ProtocolDual, rule.table, rule.dstChain); err != nil { | ||
|
@@ -438,6 +479,7 @@ func (c *Client) syncIPTables() error { | |
} | ||
return true | ||
}) | ||
|
||
// Use iptables-restore to configure IPv4 settings. | ||
if c.networkConfig.IPv4Enabled { | ||
iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv4CIDR, | ||
|
@@ -446,7 +488,9 @@ func (c *Client) syncIPTables() error { | |
antreaNodePortIPSet, | ||
config.VirtualNodePortDNATIPv4, | ||
config.VirtualServiceIPv4, | ||
snatMarkToIPv4) | ||
snatMarkToIPv4, | ||
false) | ||
|
||
// Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. | ||
if err := c.ipt.Restore(iptablesData.Bytes(), false, false); err != nil { | ||
return err | ||
|
@@ -461,12 +505,14 @@ func (c *Client) syncIPTables() error { | |
antreaNodePortIP6Set, | ||
config.VirtualNodePortDNATIPv6, | ||
config.VirtualServiceIPv6, | ||
snatMarkToIPv6) | ||
snatMarkToIPv6, | ||
true) | ||
// Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. | ||
if err := c.ipt.Restore(iptablesData.Bytes(), false, true); err != nil { | ||
return err | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
|
@@ -476,7 +522,8 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet, | |
nodePortIPSet string, | ||
nodePortDNATVirtualIP, | ||
serviceVirtualIP net.IP, | ||
snatMarkToIP map[uint32]net.IP) *bytes.Buffer { | ||
snatMarkToIP map[uint32]net.IP, | ||
isIPv6 bool) *bytes.Buffer { | ||
// Create required rules in the antrea chains. | ||
// Use iptables-restore as it flushes the involved chains and creates the desired rules | ||
// with a single call, instead of string matching to clean up stale rules. | ||
|
@@ -520,10 +567,11 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet, | |
writeLine(iptablesData, iptables.MakeChainLine(antreaMangleChain)) | ||
writeLine(iptablesData, iptables.MakeChainLine(antreaOutputChain)) | ||
|
||
// When Antrea is used to enforce NetworkPolicies in EKS, an additional iptables | ||
// mangle rule is required. See https://github.com/antrea-io/antrea/issues/678. | ||
if env.IsCloudEKS() { | ||
c.writeEKSMangleRule(iptablesData) | ||
// When Antrea is used to enforce NetworkPolicies in EKS, additional iptables | ||
// mangle rules are required. See https://github.com/antrea-io/antrea/issues/678. | ||
// These rules are only needed for IPv4. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it have NodePort IPv6 case? If yes, is the rule still needed for IPv6, otherwise reverse path may be different? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, I think you're right. This one should always be installed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually scratch that. The AWS rules are only installed for IPv4: https://github.com/aws/amazon-vpc-cni-k8s/blob/d43309bdfdb5034df86907944e682d78608ba165/pkg/networkutils/network.go#L402-L418. Not entirely sure why, but I would think that routing is configured differently for IPv6, and these rules are not needed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is this comment:
So that explains why there is no need for the rule: only the primary ENI is used, with no secondary route table. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. got it, thanks |
||
if env.IsCloudEKS() && !isIPv6 { | ||
c.writeEKSMangleRules(iptablesData) | ||
} | ||
|
||
// To make liveness/readiness probe traffic bypass ingress rules of Network Policies, mark locally generated packets | ||
|
@@ -582,8 +630,10 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet, | |
writeLine(iptablesData, "COMMIT") | ||
|
||
writeLine(iptablesData, "*nat") | ||
if c.proxyAll { | ||
if c.proxyAll || env.IsCloudEKS() { | ||
writeLine(iptablesData, iptables.MakeChainLine(antreaPreRoutingChain)) | ||
} | ||
if c.proxyAll { | ||
writeLine(iptablesData, []string{ | ||
"-A", antreaPreRoutingChain, | ||
"-m", "comment", "--comment", `"Antrea: DNAT external to NodePort packets"`, | ||
|
@@ -684,6 +734,13 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet, | |
}...) | ||
} | ||
|
||
// When Antrea is used to enforce NetworkPolicies in EKS, additional iptables | ||
// nat rules are required. See https://github.com/antrea-io/antrea/issues/3946. | ||
// These rules are only needed for IPv4. | ||
if env.IsCloudEKS() && !isIPv6 { | ||
c.writeEKSNATRules(iptablesData) | ||
} | ||
|
||
writeLine(iptablesData, "COMMIT") | ||
return iptablesData | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this still true? I didn't find this code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is part of the TODO paragraph. We don't implement this as the moment, we assume that
aws-node
(the AWS DaemonSet) is running with default configuration, which is likely to be the case for 99% of users.