Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Pod-to-external traffic on EKS in policyOnly mode #3975

Merged
merged 1 commit into from
Jul 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 84 additions & 27 deletions pkg/agent/route/route_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,21 +365,25 @@ func getLocalAntreaFlexibleIPAMPodIPSetName(isIPv6 bool) string {
}
}

// writeEKSMangleRule writes an additional iptables mangle rule to the
// iptablesData buffer, which is required to ensure that the reverse path for
// NodePort Service traffic is correct on EKS.
// See https://github.com/antrea-io/antrea/issues/678.
func (c *Client) writeEKSMangleRule(iptablesData *bytes.Buffer) {
// writeEKSMangleRules writes an additional iptables mangle rule to the
// iptablesData buffer, to set the traffic mark to the connection mark for
// traffic coming out of the gateway. This rule is needed for 2 cases:
// * for the reverse path for NodePort Service traffic (see
// https://github.com/antrea-io/antrea/issues/678).
// * for Pod-to-external traffic that needs to be SNATed (see
// https://github.com/antrea-io/antrea/issues/3946).
func (c *Client) writeEKSMangleRules(iptablesData *bytes.Buffer) {
// TODO: the following should be taking into account:
// 1) AWS_VPC_CNI_NODE_PORT_SUPPORT may be set to false (by default is
// true), in which case we do not need to install the rule.
// 1) this rule is only needed if AWS_VPC_CNI_NODE_PORT_SUPPORT is set
// to true (which is the default) or if AWS_VPC_K8S_CNI_EXTERNALSNAT
// is set to false (which is also the default). If both settings are
// changed, we do not need to install the rule.
// 2) this option is not documented but the mark value can be
// configured with AWS_VPC_K8S_CNI_CONNMARK.
// We could look for the rule added by AWS VPC CNI to the mangle
// table. If it does not exist, we do not need to install this rule. If
// it does exist we can scan for the mark value and use that in our
// rule.
klog.V(2).Infof("Add iptable mangle rule for EKS to ensure correct reverse path for NodePort Service traffic")
// While we do not have access to these environment variables, we could
// look for existing rules installed by the AWS VPC CNI, and determine
// whether we need to install this rule.
Comment on lines +383 to +385
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this still true? I didn't find this code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is part of the TODO paragraph. We don't implement this as the moment, we assume that aws-node (the AWS DaemonSet) is running with default configuration, which is likely to be the case for 99% of users.

klog.V(2).InfoS("Add iptables mangle rules for EKS")
writeLine(iptablesData, []string{
"-A", antreaMangleChain,
"-m", "comment", "--comment", `"Antrea: AWS, primary ENI"`,
Expand All @@ -388,6 +392,39 @@ func (c *Client) writeEKSMangleRule(iptablesData *bytes.Buffer) {
}...)
}

// writeEKSNATRules writes additional iptables nat rules to the iptablesData
// buffer. The first rule sets the connection mark for Pod-to-external traffic
// that needs to be SNATed. Without the mark, this traffic is not routed using
// the correct route table for Pods getting an IP address from a secondary
// network interface (ENI). The second rule restores the packet mark from the
// connection mark. That rule will only apply to the first packet of the
// connection. For subsequent packets, the rule installed buy writeEKSMangleRule
// will take care of restoring the mark. We need that rule because the mangle
// table is traversed before the nat table.
// See https://docs.aws.amazon.com/eks/latest/userguide/external-snat.html and
// https://github.com/antrea-io/antrea/issues/3946 for more details.
func (c *Client) writeEKSNATRules(iptablesData *bytes.Buffer) {
// TODO: just like for writeEKSMangleRule, these rules should ideally be
// installed conditionally, when AWS_VPC_K8S_CNI_EXTERNALSNAT is set to
// false (which is the default value).
klog.V(2).InfoS("Add iptables nat rules for EKS")
writeLine(iptablesData, []string{
"-A", antreaPreRoutingChain,
"-i", c.nodeConfig.GatewayConfig.Name,
"-m", "comment", "--comment", `"Antrea: AWS, outbound connections"`,
"-j", "AWS-CONNMARK-CHAIN-0",
}...)
// The AWS VPC CNI already installs the same rule in the PREROUTING
// chain. However, that rule will typically be visited before the
// ANTREA-PREROUTING chain, hence we need to install our own copy of the
// rule.
writeLine(iptablesData, []string{
"-A", antreaPreRoutingChain,
"-m", "comment", "--comment", `"Antrea: AWS, CONNMARK (first packet)"`,
"-j", "CONNMARK", "--restore-mark", "--nfmask", "0x80", "--ctmask", "0x80",
}...)
}

// syncIPTables ensure that the iptables infrastructure we use is set up.
// It's idempotent and can safely be called on every startup.
func (c *Client) syncIPTables() error {
Expand All @@ -400,21 +437,25 @@ func (c *Client) syncIPTables() error {
// Create the antrea managed chains and link them to built-in chains.
// We cannot use iptables-restore for these jump rules because there
// are non antrea managed rules in built-in chains.
jumpRules := []struct{ table, srcChain, dstChain, comment string }{
type jumpRule struct {
table string
srcChain string
dstChain string
comment string
}
jumpRules := []jumpRule{
{iptables.RawTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"},
{iptables.RawTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"},
{iptables.FilterTable, iptables.ForwardChain, antreaForwardChain, "Antrea: jump to Antrea forwarding rules"},
{iptables.NATTable, iptables.PostRoutingChain, antreaPostRoutingChain, "Antrea: jump to Antrea postrouting rules"},
{iptables.MangleTable, iptables.PreRoutingChain, antreaMangleChain, "Antrea: jump to Antrea mangle rules"}, // TODO: unify the chain naming style
{iptables.MangleTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"},
}
if c.proxyAll || env.IsCloudEKS() {
jumpRules = append(jumpRules, jumpRule{iptables.NATTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"})
}
if c.proxyAll {
jumpRules = append(jumpRules,
[]struct{ table, srcChain, dstChain, comment string }{
{iptables.NATTable, iptables.PreRoutingChain, antreaPreRoutingChain, "Antrea: jump to Antrea prerouting rules"},
{iptables.NATTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"},
}...,
)
jumpRules = append(jumpRules, jumpRule{iptables.NATTable, iptables.OutputChain, antreaOutputChain, "Antrea: jump to Antrea output rules"})
}
for _, rule := range jumpRules {
if err := c.ipt.EnsureChain(iptables.ProtocolDual, rule.table, rule.dstChain); err != nil {
Expand All @@ -438,6 +479,7 @@ func (c *Client) syncIPTables() error {
}
return true
})

// Use iptables-restore to configure IPv4 settings.
if c.networkConfig.IPv4Enabled {
iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv4CIDR,
Expand All @@ -446,7 +488,9 @@ func (c *Client) syncIPTables() error {
antreaNodePortIPSet,
config.VirtualNodePortDNATIPv4,
config.VirtualServiceIPv4,
snatMarkToIPv4)
snatMarkToIPv4,
false)

// Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables.
if err := c.ipt.Restore(iptablesData.Bytes(), false, false); err != nil {
return err
Expand All @@ -461,12 +505,14 @@ func (c *Client) syncIPTables() error {
antreaNodePortIP6Set,
config.VirtualNodePortDNATIPv6,
config.VirtualServiceIPv6,
snatMarkToIPv6)
snatMarkToIPv6,
true)
// Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables.
if err := c.ipt.Restore(iptablesData.Bytes(), false, true); err != nil {
return err
}
}

return nil
}

Expand All @@ -476,7 +522,8 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet,
nodePortIPSet string,
nodePortDNATVirtualIP,
serviceVirtualIP net.IP,
snatMarkToIP map[uint32]net.IP) *bytes.Buffer {
snatMarkToIP map[uint32]net.IP,
isIPv6 bool) *bytes.Buffer {
// Create required rules in the antrea chains.
// Use iptables-restore as it flushes the involved chains and creates the desired rules
// with a single call, instead of string matching to clean up stale rules.
Expand Down Expand Up @@ -520,10 +567,11 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet,
writeLine(iptablesData, iptables.MakeChainLine(antreaMangleChain))
writeLine(iptablesData, iptables.MakeChainLine(antreaOutputChain))

// When Antrea is used to enforce NetworkPolicies in EKS, an additional iptables
// mangle rule is required. See https://github.com/antrea-io/antrea/issues/678.
if env.IsCloudEKS() {
c.writeEKSMangleRule(iptablesData)
// When Antrea is used to enforce NetworkPolicies in EKS, additional iptables
// mangle rules are required. See https://github.com/antrea-io/antrea/issues/678.
// These rules are only needed for IPv4.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it have NodePort IPv6 case? If yes, is the rule still needed for IPv6, otherwise reverse path may be different?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I think you're right. This one should always be installed.
Note that we don't test Antrea on EKS with IPv6 enabled, so there may be other issues, but I'll fix that one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually scratch that. The AWS rules are only installed for IPv4: https://github.com/aws/amazon-vpc-cni-k8s/blob/d43309bdfdb5034df86907944e682d78608ba165/pkg/networkutils/network.go#L402-L418. Not entirely sure why, but I would think that routing is configured differently for IPv6, and these rules are not needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is this comment:

		//Essentially a stub function for now in V6 mode. We will need it when we support v6 in secondary IP and
		//custom networking modes. We don't need to install any SNAT rules in v6 mode and currently there is no need
		//to mark packets entering via Primary ENI as all the pods in v6 mode will be behind primary ENI. Will have to
		//start doing that once we start supporting custom networking mode in v6.

So that explains why there is no need for the rule: only the primary ENI is used, with no secondary route table.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it, thanks

if env.IsCloudEKS() && !isIPv6 {
c.writeEKSMangleRules(iptablesData)
}

// To make liveness/readiness probe traffic bypass ingress rules of Network Policies, mark locally generated packets
Expand Down Expand Up @@ -582,8 +630,10 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet,
writeLine(iptablesData, "COMMIT")

writeLine(iptablesData, "*nat")
if c.proxyAll {
if c.proxyAll || env.IsCloudEKS() {
writeLine(iptablesData, iptables.MakeChainLine(antreaPreRoutingChain))
}
if c.proxyAll {
writeLine(iptablesData, []string{
"-A", antreaPreRoutingChain,
"-m", "comment", "--comment", `"Antrea: DNAT external to NodePort packets"`,
Expand Down Expand Up @@ -684,6 +734,13 @@ func (c *Client) restoreIptablesData(podCIDR *net.IPNet,
}...)
}

// When Antrea is used to enforce NetworkPolicies in EKS, additional iptables
// nat rules are required. See https://github.com/antrea-io/antrea/issues/3946.
// These rules are only needed for IPv4.
if env.IsCloudEKS() && !isIPv6 {
c.writeEKSNATRules(iptablesData)
}

writeLine(iptablesData, "COMMIT")
return iptablesData
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/agent/util/iptables/iptables.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func (c *Client) AppendRule(protocol Protocol, table string, chain string, ruleS
return nil
}

// InsertRule checks if target rule already exists, inserts it if not.
// InsertRule checks if target rule already exists, inserts it at the beginning of the chain if not.
func (c *Client) InsertRule(protocol Protocol, table string, chain string, ruleSpec []string) error {
for p := range c.ipts {
ipt := c.ipts[p]
Expand All @@ -190,7 +190,7 @@ func (c *Client) InsertRule(protocol Protocol, table string, chain string, ruleS
if err := ipt.Insert(table, chain, 1, ruleSpec...); err != nil {
return fmt.Errorf("error inserting rule %v to table %s chain %s: %v", ruleSpec, table, chain, err)
}
klog.V(2).InfoS("Inserted a rule", "rule", ruleSpec, "table", table, "chain", chain)
klog.V(2).InfoS("Inserted a rule", "rule", ruleSpec, "table", table, "chain", chain, "index", 1)
}
return nil
}
Expand Down