From 7c3f7f415f1de65dfaacb86836de51cb85bfb0b0 Mon Sep 17 00:00:00 2001 From: Ales Musil Date: Wed, 29 Jan 2025 19:47:45 +0100 Subject: [PATCH] northd, controller: Flood ARP and NA packet on transit router. When packets goes between AZs through transit router for the first time there isn't any MAC binding for the remote port equivalent. The TR will properly generate ARP/ND NS packet that will arrive to the remote AZ, however the response would never leave the remote AZ as a consequence the local AZ would never learn this MAC binding. To prevent the described behavior add a new table that will contain all remote chassis and corresponding encapsulations that allow us to just flood all chassis with any packet that will be sent to this table. At the same time add a new action that sends the packet to this table. In order to properly generate MAC binding we need to redirect the ARP into ingress instead of egress as usual for reception from tunnels. Add flows that will match on ARP and ND NA with combination of 0 outport which should indicate that this is the remote flood flow. Only exception is VXLAN which doesn't have enough space for outport encoding, in that case we need to send the packet to both ingress and egress as we cannot determine if it was part of the remote flood or regular packet that arrived from another chassis in the same AZ. Signed-off-by: Ales Musil Acked-by: Lorenzo Bianconi Tested-by: Enrique Llorente Signed-off-by: Dumitru Ceara --- controller/lflow.c | 1 + controller/lflow.h | 4 + controller/physical.c | 198 +++++++++++++++++++++++++++++++++---- include/ovn/actions.h | 3 + lib/actions.c | 17 ++++ northd/northd.c | 12 ++- tests/multinode-macros.at | 48 +++++++++ tests/multinode.at | 201 ++++++++++++++++++++++++++++++++++++++ tests/ovn-controller.at | 63 ++++++++++++ tests/ovn-macros.at | 1 + tests/ovn.at | 10 +- tests/test-ovn.c | 1 + utilities/ovn-trace.c | 3 + 13 files changed, 538 insertions(+), 24 deletions(-) diff --git a/controller/lflow.c b/controller/lflow.c index a77d07d225..c2d280d5b6 100644 --- a/controller/lflow.c +++ b/controller/lflow.c @@ -898,6 +898,7 @@ add_matches_to_flow_table(const struct sbrec_logical_flow *lflow, .ct_nw_dst_load_table = OFTABLE_CT_ORIG_NW_DST_LOAD, .ct_ip6_dst_load_table = OFTABLE_CT_ORIG_IP6_DST_LOAD, .ct_tp_dst_load_table = OFTABLE_CT_ORIG_TP_DST_LOAD, + .flood_remote_table = OFTABLE_FLOOD_REMOTE_CHASSIS, .ctrl_meter_id = ctrl_meter_id, .common_nat_ct_zone = get_common_nat_zone(ldp), }; diff --git a/controller/lflow.h b/controller/lflow.h index 93a9f3b7e7..ab026e3bdd 100644 --- a/controller/lflow.h +++ b/controller/lflow.h @@ -98,6 +98,10 @@ struct uuid; #define OFTABLE_CT_ORIG_NW_DST_LOAD 81 #define OFTABLE_CT_ORIG_IP6_DST_LOAD 82 #define OFTABLE_CT_ORIG_TP_DST_LOAD 83 +#define OFTABLE_FLOOD_REMOTE_CHASSIS 84 + +/* Common defines shared between some controller components. */ +#define CHASSIS_FLOOD_INDEX_START 0x8000 struct lflow_ctx_in { diff --git a/controller/physical.c b/controller/physical.c index bbc97ee309..5d088302a3 100644 --- a/controller/physical.c +++ b/controller/physical.c @@ -185,6 +185,84 @@ put_encapsulation(enum mf_field_id mff_ovn_geneve, } } +static void +put_decapsulation(enum mf_field_id mff_ovn_geneve, + const struct chassis_tunnel *tun, + struct ofpbuf *ofpacts) +{ + if (tun->type == GENEVE) { + put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts); + put_move(mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15, ofpacts); + put_move(mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16, ofpacts); + } else if (tun->type == STT) { + put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, ofpacts); + put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, ofpacts); + put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, ofpacts); + } else if (tun->type == VXLAN) { + /* Add flows for non-VTEP tunnels. Split VNI into two 12-bit + * sections and use them for datapath and outport IDs. */ + put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, ofpacts); + put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, ofpacts); + } else { + OVS_NOT_REACHED(); + } +} + + +static void +put_remote_chassis_flood_encap(struct ofpbuf *ofpacts, + enum chassis_tunnel_type type, + enum mf_field_id mff_ovn_geneve) +{ + if (type == GENEVE) { + put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts); + put_load(0, mff_ovn_geneve, 0, 32, ofpacts); + put_move(MFF_LOG_INPORT, 0, mff_ovn_geneve, 16, 15, ofpacts); + } else if (type == STT) { + put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 40, 15, ofpacts); + put_load(0, MFF_TUN_ID, 24, 16, ofpacts); + put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 24, ofpacts); + } else if (type == VXLAN) { + put_move(MFF_LOG_INPORT, 0, MFF_TUN_ID, 12, 12, ofpacts); + put_move(MFF_LOG_DATAPATH, 0, MFF_TUN_ID, 0, 12, ofpacts); + } else { + OVS_NOT_REACHED(); + } +} + +static void +match_set_chassis_flood_outport(struct match *match, + enum chassis_tunnel_type type, + enum mf_field_id mff_ovn_geneve) +{ + if (type == GENEVE) { + /* Outport occupies the lower half of tunnel metadata (0-15). */ + union mf_value value, mask; + memset(&value, 0, sizeof value); + memset(&mask, 0, sizeof mask); + + const struct mf_field *mf_ovn_geneve = mf_from_id(mff_ovn_geneve); + memset(&mask.tun_metadata[mf_ovn_geneve->n_bytes - 2], 0xff, 2); + + tun_metadata_set_match(mf_ovn_geneve, &value, &mask, match, NULL); + } else if (type == STT) { + /* Outport occupies bits 24-39. */ + match_set_tun_id_masked(match, 0, htonll(UINT64_C(0xffff) << 24)); + } +} + +static void +match_set_chassis_flood_remote(struct match *match, uint32_t index) +{ + match_init_catchall(match); + match_set_reg(match, MFF_REG6 - MFF_REG0, index); + /* Match if the packet wasn't already received from tunnel. + * This prevents from looping it back to the tunnel again. */ + match_set_reg_masked(match, MFF_LOG_FLAGS - MFF_REG0, 0, + MLF_RX_FROM_TUNNEL); +} + + static void put_stack(enum mf_field_id field, struct ofpact_stack *stack) { @@ -2367,6 +2445,105 @@ consider_mc_group(const struct physical_ctx *ctx, sset_destroy(&vtep_chassis); } +#define CHASSIS_FLOOD_MAX_MSG_SIZE MC_OFPACTS_MAX_MSG_SIZE + +static void +physical_eval_remote_chassis_flows(const struct physical_ctx *ctx, + struct ofpbuf *egress_ofpacts, + struct ovn_desired_flow_table *flow_table) +{ + struct match match = MATCH_CATCHALL_INITIALIZER; + uint32_t index = CHASSIS_FLOOD_INDEX_START; + struct chassis_tunnel *prev = NULL; + + uint8_t actions_stub[256]; + struct ofpbuf ingress_ofpacts; + ofpbuf_use_stub(&ingress_ofpacts, actions_stub, sizeof(actions_stub)); + + ofpbuf_clear(egress_ofpacts); + + const struct sbrec_chassis *chassis; + SBREC_CHASSIS_TABLE_FOR_EACH (chassis, ctx->chassis_table) { + if (!smap_get_bool(&chassis->other_config, "is-remote", false)) { + continue; + } + + struct chassis_tunnel *tun = + chassis_tunnel_find(ctx->chassis_tunnels, chassis->name, + NULL, NULL); + if (!tun) { + continue; + } + + if (!(prev && prev->type == tun->type)) { + put_remote_chassis_flood_encap(egress_ofpacts, tun->type, + ctx->mff_ovn_geneve); + } + + ofpact_put_OUTPUT(egress_ofpacts)->port = tun->ofport; + prev = tun; + + if (egress_ofpacts->size > CHASSIS_FLOOD_MAX_MSG_SIZE) { + match_set_chassis_flood_remote(&match, index++); + put_split_buf_function(index, 0, OFTABLE_FLOOD_REMOTE_CHASSIS, + egress_ofpacts); + + ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0, + &match, egress_ofpacts, hc_uuid); + + ofpbuf_clear(egress_ofpacts); + prev = NULL; + } + + + ofpbuf_clear(&ingress_ofpacts); + put_load(1, MFF_LOG_FLAGS, MLF_RX_FROM_TUNNEL_BIT, 1, + &ingress_ofpacts); + put_decapsulation(ctx->mff_ovn_geneve, tun, &ingress_ofpacts); + put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &ingress_ofpacts); + if (tun->type == VXLAN) { + /* VXLAN doesn't carry the inport information, we cannot set + * the outport to 0 then and match on it. */ + put_resubmit(OFTABLE_LOCAL_OUTPUT, &ingress_ofpacts); + } + + /* Add match on ARP response coming from remote chassis. */ + match_init_catchall(&match); + match_set_in_port(&match, tun->ofport); + match_set_dl_type(&match, htons(ETH_TYPE_ARP)); + match_set_arp_opcode_masked(&match, 2, UINT8_MAX); + match_set_chassis_flood_outport(&match, tun->type, + ctx->mff_ovn_geneve); + + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, + chassis->header_.uuid.parts[0], + &match, &ingress_ofpacts, hc_uuid); + + /* Add match on ND NA coming from remote chassis. */ + match_init_catchall(&match); + match_set_in_port(&match, tun->ofport); + match_set_dl_type(&match, htons(ETH_TYPE_IPV6)); + match_set_nw_proto(&match, IPPROTO_ICMPV6); + match_set_icmp_type(&match, 136); + match_set_icmp_code(&match, 0); + match_set_chassis_flood_outport(&match, tun->type, + ctx->mff_ovn_geneve); + + ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 120, + chassis->header_.uuid.parts[0], + &match, &ingress_ofpacts, hc_uuid); + } + + if (egress_ofpacts->size > 0) { + match_set_chassis_flood_remote(&match, index++); + + ofctrl_add_flow(flow_table, OFTABLE_FLOOD_REMOTE_CHASSIS, 100, 0, + &match, egress_ofpacts, hc_uuid); + } + + ofpbuf_uninit(&ingress_ofpacts); +} + static void physical_eval_port_binding(struct physical_ctx *p_ctx, const struct sbrec_port_binding *pb, @@ -2531,24 +2708,7 @@ physical_run(struct physical_ctx *p_ctx, match_set_in_port(&match, tun->ofport); ofpbuf_clear(&ofpacts); - if (tun->type == GENEVE) { - put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts); - put_move(p_ctx->mff_ovn_geneve, 16, MFF_LOG_INPORT, 0, 15, - &ofpacts); - put_move(p_ctx->mff_ovn_geneve, 0, MFF_LOG_OUTPORT, 0, 16, - &ofpacts); - } else if (tun->type == STT) { - put_move(MFF_TUN_ID, 40, MFF_LOG_INPORT, 0, 15, &ofpacts); - put_move(MFF_TUN_ID, 24, MFF_LOG_OUTPORT, 0, 16, &ofpacts); - put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 24, &ofpacts); - } else if (tun->type == VXLAN) { - /* Add flows for non-VTEP tunnels. Split VNI into two 12-bit - * sections and use them for datapath and outport IDs. */ - put_move(MFF_TUN_ID, 12, MFF_LOG_OUTPORT, 0, 12, &ofpacts); - put_move(MFF_TUN_ID, 0, MFF_LOG_DATAPATH, 0, 12, &ofpacts); - } else { - OVS_NOT_REACHED(); - } + put_decapsulation(p_ctx->mff_ovn_geneve, tun, &ofpacts); put_resubmit(OFTABLE_LOCAL_OUTPUT, &ofpacts); ofctrl_add_flow(flow_table, OFTABLE_PHY_TO_LOG, 100, 0, &match, @@ -2800,5 +2960,7 @@ physical_run(struct physical_ctx *p_ctx, ofctrl_add_flow(flow_table, OFTABLE_CT_ORIG_IP6_DST_LOAD, 100, 0, &match, &ofpacts, hc_uuid); + physical_eval_remote_chassis_flows(p_ctx, &ofpacts, flow_table); + ofpbuf_uninit(&ofpacts); } diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 0e2186d6dc..93edb6ea6a 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -134,6 +134,7 @@ struct collector_set_ids; OVNACT(CT_ORIG_NW_DST, ovnact_result) \ OVNACT(CT_ORIG_IP6_DST, ovnact_result) \ OVNACT(CT_ORIG_TP_DST, ovnact_result) \ + OVNACT(FLOOD_REMOTE, ovnact_null) \ /* enum ovnact_type, with a member OVNACT_ for each action. */ enum OVS_PACKED_ENUM ovnact_type { @@ -948,6 +949,8 @@ struct ovnact_encode_params { * to resubmit. */ uint32_t ct_tp_dst_load_table; /* OpenFlow table for 'ct_tp_dst' * to resubmit. */ + uint32_t flood_remote_table; /* OpenFlow table for 'chassis_flood' + * to resubmit. */ }; void ovnacts_encode(const struct ovnact[], size_t ovnacts_len, diff --git a/lib/actions.c b/lib/actions.c index 44ed39d92e..7ec481e006 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -5533,6 +5533,21 @@ format_CT_ORIG_TP_DST(const struct ovnact_result *res, struct ds *s) ds_put_cstr(s, " = ct_tp_dst();"); } +static void +format_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED, struct ds *s) +{ + ds_put_cstr(s, "flood_remote;"); +} + +static void +encode_FLOOD_REMOTE(const struct ovnact_null *null OVS_UNUSED, + const struct ovnact_encode_params *ep, + struct ofpbuf *ofpacts) +{ + put_load(CHASSIS_FLOOD_INDEX_START, MFF_REG6, 0, 32, ofpacts); + emit_resubmit(ofpacts, ep->flood_remote_table); +} + /* Parses an assignment or exchange or put_dhcp_opts action. */ static void parse_set_action(struct action_context *ctx) @@ -5760,6 +5775,8 @@ parse_action(struct action_context *ctx) parse_sample(ctx); } else if (lexer_match_id(ctx->lexer, "mac_cache_use")) { ovnact_put_MAC_CACHE_USE(ctx->ovnacts); + } else if (lexer_match_id(ctx->lexer, "flood_remote")) { + ovnact_put_FLOOD_REMOTE(ctx->ovnacts); } else { lexer_syntax_error(ctx->lexer, "expecting action"); } diff --git a/northd/northd.c b/northd/northd.c index 4abe2e9a8c..587fcd586f 100644 --- a/northd/northd.c +++ b/northd/northd.c @@ -13148,21 +13148,22 @@ build_neigh_learning_flows_for_lrouter( * */ /* Flows for LOOKUP_NEIGHBOR. */ + const char *flood = od->is_transit_router ? "flood_remote; " : ""; bool learn_from_arp_request = smap_get_bool(&od->nbr->options, "always_learn_from_arp_request", true); ds_clear(actions); ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT - " = lookup_arp(inport, arp.spa, arp.sha); %snext;", + " = lookup_arp(inport, arp.spa, arp.sha); %s%snext;", learn_from_arp_request ? "" : - REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); + REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood); ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "arp.op == 2", ds_cstr(actions), lflow_ref); ds_clear(actions); ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT - " = lookup_nd(inport, nd.target, nd.tll); %snext;", + " = lookup_nd(inport, nd.target, nd.tll); %s%snext;", learn_from_arp_request ? "" : - REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; "); + REGBIT_LOOKUP_NEIGHBOR_IP_RESULT" = 1; ", flood); ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 100, "nd_na", ds_cstr(actions), lflow_ref); @@ -13178,7 +13179,8 @@ build_neigh_learning_flows_for_lrouter( ds_put_format(actions, REGBIT_LOOKUP_NEIGHBOR_RESULT " = lookup_nd(inport, nd.target, nd.tll); " REGBIT_LOOKUP_NEIGHBOR_IP_RESULT - " = lookup_nd_ip(inport, nd.target); next;"); + " = lookup_nd_ip(inport, nd.target); %snext;", + flood); ovn_lflow_add(lflows, od, S_ROUTER_IN_LOOKUP_NEIGHBOR, 110, "nd_na && ip6.src == fe80::/10 && ip6.dst == ff00::/8", ds_cstr(actions), lflow_ref); diff --git a/tests/multinode-macros.at b/tests/multinode-macros.at index 698d2c6250..29f0711e64 100644 --- a/tests/multinode-macros.at +++ b/tests/multinode-macros.at @@ -112,6 +112,54 @@ cleanup_multinode_resources_by_nodes() { done } +# multinode_cleanup_northd NODE +# +# Removes previously set nothd on specified node +multinode_cleanup_northd() { + c=$1 + # Cleanup existing one + m_as $c /usr/share/ovn/scripts/ovn-ctl stop_northd + m_as $c rm -f /etc/ovn/*.db +} + +# multinode_setup_northd NODE +# +# Sets up northd on specified node. +multinode_setup_northd() { + c=$1 + + multinode_cleanup_northd $c + + m_as $c /usr/share/ovn/scripts/ovn-ctl start_northd + m_as $c ovn-nbctl set-connection ptcp:6641 + m_as $c ovn-sbctl set-connection ptcp:6642 +} + +# multinode_setup_controller NODE ENCAP_IP REMOTE_IP [ENCAP_TYPE] +# +# Sets up controller on specified node. +multinode_setup_controller() { + c=$1 + encap_ip=$3 + remote_ip=$4 + encap_type=${5:-"geneve"} + + # Cleanup existing one + m_as $c /usr/share/openvswitch/scripts/ovs-ctl stop + m_as $c /usr/share/ovn/scripts/ovn-ctl stop_controller + m_as $c rm -f /etc/openvswitch/*.db + + m_as $c /usr/share/openvswitch/scripts/ovs-ctl start --system-id=$c + m_as $c /usr/share/ovn/scripts/ovn-ctl start_controller + + m_as $c ovs-vsctl set open . external_ids:ovn-encap-ip=$encap_ip + m_as $c ovs-vsctl set open . external-ids:ovn-encap-type=$encap_type + m_as $c ovs-vsctl set open . external-ids:ovn-remote=tcp:$remote_ip:6642 + m_as $c ovs-vsctl set open . external-ids:ovn-openflow-probe-interval=60 + m_as $c ovs-vsctl set open . external-ids:ovn-remote-probe-interval=180000 + m_as $c ovs-vsctl set open . external-ids:ovn-bridge-datapath-type=system +} + # m_count_rows TABLE [CONDITION...] # # Prints the number of rows in TABLE (that satisfy CONDITION). diff --git a/tests/multinode.at b/tests/multinode.at index 9602358aae..c1bd3123ac 100644 --- a/tests/multinode.at +++ b/tests/multinode.at @@ -2575,3 +2575,204 @@ Connected to 10.0.2.4 (10.0.2.4) port 8080 fi AT_CLEANUP + +AT_SETUP([ovn multinode - Transit Router basic functionality]) + +# Check that ovn-fake-multinode setup is up and running +check_fake_multinode_setup + +# Delete the multinode NB and OVS resources before starting the test. +cleanup_multinode_resources + +# Network topology +# ┌─────────────────────────────────┐ ┌────────────────────────────────┐ +# │ │ │ │ +# │ ┌───────────────────┐ AZ1 │ │ AZ2 ┌───────────────────┐ │ +# │ │ external │ │ │ │ │ │ +# │ │ │ │ │ │ │ │ +# │ │ 192.168.100.10/24 │ │ │ │ ................. │ │ +# │ │ 1000::10/64 │ │ │ │ │ │ +# │ └─────────┬─────────┘ │ │ └─────────┬─────────┘ │ +# │ │ │ │ │ │ +# │ │ │ │ │ │ +# │ ┌─────────┴─────────┐ │ │ ┌─────────┴─────────┐ │ +# │ │ 192.168.100.1/24 │ │ │ │ 192.168.100.2/24 │ │ +# │ │ 1000::1/64 │ │ │ │ 1000::2/64 │ │ +# │ │ │ │ │ │ │ │ +# │ │ GW │ │ │ │ GW │ │ +# │ │ │ │ │ │ │ │ +# │ │ 100.65.0.1/30 │ │ │ │ 100.65.0.5/30 │ │ +# │ │ 100:65::1/126 │ │ │ │ 100:65::5/126 │ │ +# │ └─────────┬─────────┘ │ │ └───────────────────┘ │ +# │ │ │ │ │ │ +# │ │ Peer ports │ │ │ Peer ports │ +# │ │ │ │ │ │ +# │ ┌─────────┴──────────────────│─────│──────────────────┴─────────┐ │ +# │ │ 100.65.0.2/30 │ │ 100.65.0.6/30 │ │ +# │ │ 100:65::2/126 │ │ 100:65::6/126 │ │ +# │ │ │ │ │ │ +# │ │ │ TR │ │ │ +# │ │ │ │ │ │ +# │ │ 10.100.200.1/24 │ │ 10.100.200.1/24 │ │ +# │ │ 10:200::1/64 │ │ 10:200::1/64 │ │ +# │ └─────────┬──────────────────│─────│────────────────────────────┘ │ +# │ │ │ │ │ │ +# │ │ │ │ │ │ +# │ │ │ │ │ │ +# │ ┌─────────┴──────────────────│─────│────────────────────────────┐ │ +# │ │ │ TS │ │ │ +# │ └─────────┬──────────────────│─────│────────────────────────────┘ │ +# │ │ │ │ │ │ +# │ │ │ │ │ │ +# │ │ │ │ │ │ +# │ ┌─────────┴─────────┐ │ │ ┌─────────┴─────────┐ │ +# │ │ pod10 │ │ │ │ pod20 │ │ +# │ │ │ │ │ │ │ │ +# │ │ 10.100.200.10/24 │ │ │ │ 10.100.200.20/24 │ │ +# │ │ 10:200::10/64 │ │ │ │ 10:200::20/64 │ │ +# │ └───────────────────┘ │ │ └───────────────────┘ │ +# └─────────────────────────────────┘ └────────────────────────────────┘ + +for i in 1 2; do + chassis="ovn-chassis-$i" + ip=$(m_as $chassis ip -4 addr show eth1 | grep inet | awk '{print $2}' | cut -d'/' -f1) + + multinode_setup_northd $chassis + multinode_setup_controller $chassis $chassis $ip $ip + + check m_as $chassis ovs-vsctl set open . external_ids:ovn-monitor-all=true + check m_as $chassis ovs-vsctl set open . external_ids:ovn-is-interconn=true + + check m_as $chassis ovn-nbctl ls-add public + + check m_as $chassis ovn-nbctl lsp-add public public-gw + check m_as $chassis ovn-nbctl lsp-set-type public-gw router + check m_as $chassis ovn-nbctl lsp-set-addresses public-gw router + check m_as $chassis ovn-nbctl lsp-set-options public-gw router-port=gw-public + + check m_as $chassis ovn-nbctl lr-add gw + check m_as $chassis ovn-nbctl lrp-add gw gw-public 00:00:00:00:20:00 192.168.100.$i/24 1000::$i/64 + + check m_as $chassis ovn-nbctl set logical_router gw options:chassis=$chassis + + # Add TR and set the same tunnel key for both chassis + check m_as $chassis ovn-nbctl ls-add ts + check m_as $chassis ovn-nbctl set logical_switch ts other_config:requested-tnl-key=10 + + check m_as $chassis ovn-nbctl lsp-add ts ts-tr + check m_as $chassis ovn-nbctl lsp-set-type ts-tr router + check m_as $chassis ovn-nbctl lsp-set-addresses ts-tr router + check m_as $chassis ovn-nbctl lsp-set-options ts-tr router-port=tr-ts + + check m_as $chassis ovn-nbctl lr-add tr + check m_as $chassis ovn-nbctl lrp-add tr tr-ts 00:00:00:00:10:00 10.100.200.1/24 10:200::1/64 + check m_as $chassis ovn-nbctl set logical_router tr options:requested-tnl-key=20 + + # Add TS pods, with the same tunnel keys on both sides + check m_as $chassis ovn-nbctl lsp-add ts pod10 + check m_as $chassis ovn-nbctl lsp-set-addresses pod10 "00:00:00:00:10:10 10.100.200.10 10:200::10" + check m_as $chassis ovn-nbctl set logical_switch_port pod10 options:requested-tnl-key=10 + + check m_as $chassis ovn-nbctl lsp-add ts pod20 + check m_as $chassis ovn-nbctl lsp-set-addresses pod20 "00:00:00:00:10:20 10.100.200.20 10:200::20" + check m_as $chassis ovn-nbctl set logical_switch_port pod20 options:requested-tnl-key=20 +done + +# Add SNAT for the GW router that corresponds to "gw-tr" LRP IP +check m_as ovn-chassis-1 ovn-nbctl lr-nat-add gw snat 100.65.0.1 192.168.100.0/24 +check m_as ovn-chassis-1 ovn-nbctl lr-nat-add gw snat 100:65::1 1000::/64 +check m_as ovn-chassis-2 ovn-nbctl lr-nat-add gw snat 100.65.0.5 192.168.100.0/24 +check m_as ovn-chassis-2 ovn-nbctl lr-nat-add gw snat 100:65::5 1000::/64 + +# Add peer ports between GW and TR +check m_as ovn-chassis-1 ovn-nbctl lrp-add gw gw-tr 00:00:00:00:30:01 100.65.0.1/30 100:65::1/126 peer=tr-gw +check m_as ovn-chassis-1 ovn-nbctl lrp-add tr tr-gw 00:00:00:00:30:02 100.65.0.2/30 100:65::2/126 peer=gw-tr + +check m_as ovn-chassis-2 ovn-nbctl lrp-add gw gw-tr 00:00:00:00:30:05 100.65.0.5/30 100:65::5/126 peer=tr-gw +check m_as ovn-chassis-2 ovn-nbctl lrp-add tr tr-gw 00:00:00:00:30:06 100.65.0.6/30 100:65::6/126 peer=gw-tr + +# Add routes for the TS subnet +check m_as ovn-chassis-1 ovn-nbctl lr-route-add gw 10.100.200.0/24 100.65.0.2 +check m_as ovn-chassis-1 ovn-nbctl lr-route-add gw 10:200::/64 100:65::2 +check m_as ovn-chassis-2 ovn-nbctl lr-route-add gw 10.100.200.0/24 100.65.0.6 +check m_as ovn-chassis-2 ovn-nbctl lr-route-add gw 10:200::/64 100:65::6 + +# Add mutual remote ports +check m_as ovn-chassis-1 ovn-nbctl lrp-add tr tr-az2 00:00:00:00:30:06 100.65.0.6/30 100:65::6/126 +check m_as ovn-chassis-1 ovn-nbctl set logical_router_port tr-az2 options:requested-chassis=ovn-chassis-2 + +check m_as ovn-chassis-2 ovn-nbctl lrp-add tr tr-az1 00:00:00:00:30:02 100.65.0.2/30 100:65::2/126 +check m_as ovn-chassis-2 ovn-nbctl set logical_router_port tr-az1 options:requested-chassis=ovn-chassis-1 + +# Important set the proper tunnel keys +check m_as ovn-chassis-1 ovn-nbctl set logical_router_port tr-gw options:requested-tnl-key=10 +check m_as ovn-chassis-1 ovn-nbctl set logical_router_port tr-az2 options:requested-tnl-key=20 + +check m_as ovn-chassis-2 ovn-nbctl set logical_router_port tr-gw options:requested-tnl-key=20 +check m_as ovn-chassis-2 ovn-nbctl set logical_router_port tr-az1 options:requested-tnl-key=10 + +check m_as ovn-chassis-1 ovn-nbctl lsp-add public external +check m_as ovn-chassis-1 ovn-nbctl lsp-set-addresses external "00:00:00:00:20:10 192.168.100.10 1000::10" + +# Add mutual chassis +check m_as ovn-chassis-1 ovn-sbctl chassis-add ovn-chassis-2 geneve $(m_as ovn-chassis-2 ip -4 addr show eth1 | grep inet | awk '{print $2}' | cut -d'/' -f1) +check m_as ovn-chassis-1 ovn-sbctl set chassis ovn-chassis-2 other_config:is-remote=true + +check m_as ovn-chassis-2 ovn-sbctl chassis-add ovn-chassis-1 geneve $(m_as ovn-chassis-1 ip -4 addr show eth1 | grep inet | awk '{print $2}' | cut -d'/' -f1) +check m_as ovn-chassis-2 ovn-sbctl set chassis ovn-chassis-1 other_config:is-remote=true + +# Configure ports on the transit switch as remotes +check m_as ovn-chassis-1 ovn-nbctl lsp-set-type pod20 remote +check m_as ovn-chassis-1 ovn-nbctl lsp-set-options pod10 requested-chassis=ovn-chassis-1 +check m_as ovn-chassis-1 ovn-nbctl lsp-set-options pod20 requested-chassis=ovn-chassis-2 + +check m_as ovn-chassis-2 ovn-nbctl lsp-set-type pod10 remote +check m_as ovn-chassis-2 ovn-nbctl lsp-set-options pod10 requested-chassis=ovn-chassis-1 +check m_as ovn-chassis-2 ovn-nbctl lsp-set-options pod20 requested-chassis=ovn-chassis-2 + +m_as ovn-chassis-1 /data/create_fake_vm.sh external external 00:00:00:00:20:10 1500 192.168.100.10 24 192.168.100.1 1000::10/64 1000::1 +m_as ovn-chassis-1 /data/create_fake_vm.sh pod10 pod10 00:00:00:00:10:10 1500 10.100.200.10 24 10.100.200.1 10:200::10/64 10:200::1 +m_as ovn-chassis-2 /data/create_fake_vm.sh pod20 pod20 00:00:00:00:10:20 1500 10.100.200.20 24 10.100.200.1 10:200::20/64 10:200::1 + +# We cannot use any of the helpers as they assume that there is only single ovn-northd instance running +check m_as ovn-chassis-1 ovn-nbctl --wait=hv sync +OVS_WAIT_UNTIL([test -n "$(m_as ovn-chassis-1 ovn-sbctl --bare --columns _uuid find Port_Binding logical_port=external up=true)"]) +OVS_WAIT_UNTIL([test -n "$(m_as ovn-chassis-1 ovn-sbctl --bare --columns _uuid find Port_Binding logical_port=pod10 up=true)"]) +check m_as ovn-chassis-2 ovn-nbctl --wait=hv sync +OVS_WAIT_UNTIL([test -n "$(m_as ovn-chassis-2 ovn-sbctl --bare --columns _uuid find Port_Binding logical_port=pod20 up=true)"]) + +M_NS_CHECK_EXEC([ovn-chassis-1], [external], [ping -q -c 5 -i 0.3 -w 2 10.100.200.20 | FORMAT_PING], \ +[0], [dnl +5 packets transmitted, 5 received, 0% packet loss, time 0ms +]) + +M_NS_CHECK_EXEC([ovn-chassis-1], [external], [ping -q -c 5 -i 0.3 -w 2 10:200::20 | FORMAT_PING], \ +[0], [dnl +5 packets transmitted, 5 received, 0% packet loss, time 0ms +]) + +check test $(m_as ovn-chassis-1 grep -c "skipping output to input port" \ + /var/log/openvswitch/ovs-vswitchd.log) -eq 0 +check test $(m_as ovn-chassis-2 grep -c "skipping output to input port" \ + /var/log/openvswitch/ovs-vswitchd.log) -eq 0 + +echo "Chassis1" +m_as ovn-chassis-1 ovn-sbctl show +m_as ovn-chassis-1 ovn-nbctl show +m_as ovn-chassis-1 ovs-vsctl show + +echo "Chassis2" +m_as ovn-chassis-2 ovn-sbctl show +m_as ovn-chassis-2 ovn-nbctl show +m_as ovn-chassis-2 ovs-vsctl show + +# Connect the chassis back to the original northd and remove northd per chassis. +for i in 1 2; do + chassis="ovn-chassis-$i" + ip=$(m_as $chassis ip -4 addr show eth1 | grep inet | awk '{print $2}' | cut -d'/' -f1) + + multinode_setup_controller $chassis $chassis $ip "170.168.0.2" + multinode_cleanup_northd $chassis +done + +AT_CLEANUP diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index b177786b1f..07a1aea7ad 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -3647,3 +3647,66 @@ AT_CHECK([grep -c "cookie=$lr1_peer_cookie," log_to_phy_flows], [0], [dnl OVN_CLEANUP([hv1]) AT_CLEANUP + +AT_SETUP([Remote chassis flood flows]) +ovn_start + +net_add n1 +sim_add hv1 +as hv1 +check ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.11 24 geneve,vxlan + +check ovs-vsctl set open . external_ids:ovn-is-interconn=true + +check ovn-sbctl chassis-add hv2 geneve 192.168.0.12 \ + -- set chassis hv2 other_config:is-remote=true + +check ovn-sbctl chassis-add hv3 vxlan 192.168.0.14 \ + -- set chassis hv3 other_config:is-remote=true + +check ovn-nbctl --wait=hv sync + +chassis_cookie() { + ovn-debug uuid-to-cookie $(fetch_column chassis _uuid name=$1) +} + +ovs-ofctl dump-flows --names --no-stats br-int table=OFTABLE_PHY_TO_LOG > phy_to_log_flows +ovs-ofctl dump-flows --names --no-stats br-int table=OFTABLE_FLOOD_REMOTE_CHASSIS > flood_flows + +# Check that we have all encap + output actions one by one because the order can change +# Geneve +AT_CHECK([grep -c 'move:OXM_OF_METADATA\[[0..23\]]->NXM_NX_TUN_ID\[[0..23\]],set_field:0->tun_metadata0,move:NXM_NX_REG14\[[0..14\]]->NXM_NX_TUN_METADATA0\[[16..30\]],output:"ovn-hv2-0"' flood_flows], [0], [dnl +1 +]) + +# VXLAN +AT_CHECK([grep -c 'move:NXM_NX_REG14\[[0..11\]]->NXM_NX_TUN_ID\[[12..23\]],move:OXM_OF_METADATA\[[0..11\]]->NXM_NX_TUN_ID\[[0..11\]],output:"ovn-hv3-0"' flood_flows], [0], [dnl +1 +]) + +AT_CHECK([grep -c "reg6=0x8000" flood_flows], [0], [dnl +1 +]) + +AT_CHECK([grep -c "reg10=0/0x10000" flood_flows], [0], [dnl +1 +]) + +# Check ingress flows for ARP and ND NA +# Geneve +hv2_cookie="$(chassis_cookie hv2)" +AT_CHECK_UNQUOTED([grep "cookie=$hv2_cookie," phy_to_log_flows], [0], [dnl + cookie=$hv2_cookie, priority=120,arp,tun_metadata0=0,in_port="ovn-hv2-0",arp_op=2 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) + cookie=$hv2_cookie, priority=120,icmp6,tun_metadata0=0,in_port="ovn-hv2-0",icmp_type=136,icmp_code=0 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[0..23]]->OXM_OF_METADATA[[0..23]],move:NXM_NX_TUN_METADATA0[[16..30]]->NXM_NX_REG14[[0..14]],move:NXM_NX_TUN_METADATA0[[0..15]]->NXM_NX_REG15[[0..15]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE) +]) + +# VXLAN +hv3_cookie="$(chassis_cookie hv3)" +AT_CHECK_UNQUOTED([grep "cookie=$hv3_cookie," phy_to_log_flows], [0], [dnl + cookie=$hv3_cookie, priority=120,icmp6,in_port="ovn-hv3-0",icmp_type=136,icmp_code=0 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[12..23]]->NXM_NX_REG15[[0..11]],move:NXM_NX_TUN_ID[[0..11]]->OXM_OF_METADATA[[0..11]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE),resubmit(,OFTABLE_LOCAL_OUTPUT) + cookie=$hv3_cookie, priority=120,arp,in_port="ovn-hv3-0",arp_op=2 actions=load:0x1->NXM_NX_REG10[[16]],move:NXM_NX_TUN_ID[[12..23]]->NXM_NX_REG15[[0..11]],move:NXM_NX_TUN_ID[[0..11]]->OXM_OF_METADATA[[0..11]],resubmit(,OFTABLE_LOG_INGRESS_PIPELINE),resubmit(,OFTABLE_LOCAL_OUTPUT) +]) + +OVN_CLEANUP([hv1]) +AT_CLEANUP diff --git a/tests/ovn-macros.at b/tests/ovn-macros.at index 6791424bc1..6a568e7437 100644 --- a/tests/ovn-macros.at +++ b/tests/ovn-macros.at @@ -1433,5 +1433,6 @@ m4_define([OFTABLE_CT_ZONE_LOOKUP], [80]) m4_define([OFTABLE_CT_ORIG_NW_DST_LOAD], [81]) m4_define([OFTABLE_CT_ORIG_IP6_DST_LOAD], [82]) m4_define([OFTABLE_CT_ORIG_TP_DST_LOAD], [83]) +m4_define([OFTABLE_FLOOD_REMOTE_CHASSIS], [84]) m4_define([OFTABLE_SAVE_INPORT_HEX], [m4_eval(OFTABLE_SAVE_INPORT, 16)]) diff --git a/tests/ovn.at b/tests/ovn.at index da0bb7e16c..3156bbd315 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2279,6 +2279,12 @@ ct_tp_dst; ct_tp_dst(); Syntax error at `ct_tp_dst' expecting action. +flood_remote; + encodes as set_field:0x8000->reg6,resubmit(,OFTABLE_FLOOD_REMOTE_CHASSIS) + +flood_remote(); + Syntax error at `(' expecting `;'. + # Miscellaneous negative tests. ; Syntax error at `;'. @@ -35760,7 +35766,9 @@ check_default_flows() { # respectively and it's OK if they don't have a default action. # Tables 81, 82 and 83 are part of ct_nw_dst(), ct_ip6_dst() and ct_tp_dst() # actions respectively and its OK for them to not have default flows. - if test ${table} -eq 68 -o ${table} -eq 70 -o ${table} -eq 81 -o ${table} -eq 82 -o ${table} -eq 83; then + # Table 84 is part of flood_remote; action and its OK for + # it to not have default flows. + if test ${table} -eq 68 -o ${table} -eq 70 -o ${table} -eq 81 -o ${table} -eq 82 -o ${table} -eq 83 -o ${table} -eq 84; then continue; fi AT_CHECK([grep -qe "table=$table.* priority=0\(,metadata=0x\w*\)\? actions" oflows], [0], [ignore], [ignore], [echo "Table $table does not contain a default action"]) diff --git a/tests/test-ovn.c b/tests/test-ovn.c index c310e197e1..59328dc6cd 100644 --- a/tests/test-ovn.c +++ b/tests/test-ovn.c @@ -1386,6 +1386,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED) .ct_nw_dst_load_table = OFTABLE_CT_ORIG_NW_DST_LOAD, .ct_ip6_dst_load_table = OFTABLE_CT_ORIG_IP6_DST_LOAD, .ct_tp_dst_load_table = OFTABLE_CT_ORIG_TP_DST_LOAD, + .flood_remote_table = OFTABLE_FLOOD_REMOTE_CHASSIS, .lflow_uuid.parts = { 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd}, .dp_key = 0xabcdef, diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c index bd31cdbf5e..d25c612c78 100644 --- a/utilities/ovn-trace.c +++ b/utilities/ovn-trace.c @@ -3460,6 +3460,9 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, break; case OVNACT_CT_ORIG_TP_DST: break; + case OVNACT_FLOOD_REMOTE: + ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT, + "/* Flood to all remote chassis */"); } } ofpbuf_uninit(&stack);