From 6728d578f64e124d9905f1d48899226405cb85ae Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 29 Jun 2015 18:01:59 -0700 Subject: [PATCH] dpif-netdev: Translate Geneve options per-flow, not per-packet. The kernel implementation of Geneve options stores the TLV option data in the flow exactly as received, without any further parsing. This is then translated to known options for the purposes of matching on flow setup (which will then install a datapath flow in the form the kernel is expecting). The userspace implementation behaves a little bit differently - it looks up known options as each packet is received. The reason for this is there is a much tighter coupling between datapath and flow translation and the representation is generally expected to be the same. This works but it incurs work on a per-packet basis that could be done per-flow instead. This introduces a small translation step for Geneve packets between datapath and flow lookup for the userspace datapath in order to allow the same kind of processing that the kernel does. A side effect of this is that unknown options are now shown when flows dumped via ovs-appctl dpif/dump-flows, similar to the kernel. There is a second benefit to this as well: for some operations it is preferable to keep the options exactly as they were received on the wire, which this enables. One example is that for packets that are executed from ofproto-dpif-upcall to the datapath, this avoids the translation of Geneve metadata. Since this conversion is potentially lossy (for unknown options), keeping everything in the same format removes the possibility of dropping options if the packet comes back up to userspace and the Geneve option translation table has changed. To help with these types of operations, most functions can understand both formats of data and seamlessly do the right thing. Signed-off-by: Jesse Gross Acked-by: Jarno Rajahalme --- lib/automake.mk | 1 + lib/dpif-netdev.c | 62 +++++- lib/flow.c | 47 ++++- lib/flow.h | 13 +- lib/geneve.h | 63 ++++++ lib/meta-flow.c | 6 +- lib/netdev-vport.c | 26 ++- lib/odp-execute.c | 2 +- lib/odp-util.c | 58 ++++-- lib/odp-util.h | 12 +- lib/packets.h | 41 +--- lib/tun-metadata.c | 352 ++++++++++++++++++++++++---------- lib/tun-metadata.h | 74 ++++--- ofproto/ofproto-dpif-sflow.c | 2 +- ofproto/ofproto-dpif-upcall.c | 2 +- tests/tunnel-push-pop.at | 2 +- 16 files changed, 540 insertions(+), 223 deletions(-) create mode 100644 lib/geneve.h diff --git a/lib/automake.mk b/lib/automake.mk index a7e7b9b6729..15a9373bb07 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -81,6 +81,7 @@ lib_libopenvswitch_la_SOURCES = \ lib/fatal-signal.h \ lib/flow.c \ lib/flow.h \ + lib/geneve.h \ lib/guarded-list.c \ lib/guarded-list.h \ lib/hash.c \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 0351cdf9001..c14435202bf 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1884,8 +1884,8 @@ dpif_netdev_mask_from_nlattrs(const struct nlattr *key, uint32_t key_len, if (mask_key_len) { enum odp_key_fitness fitness; - fitness = odp_flow_key_to_mask(mask_key, mask_key_len, key, key_len, - &wc->masks, flow); + fitness = odp_flow_key_to_mask_udpif(mask_key, mask_key_len, key, + key_len, &wc->masks, flow); if (fitness) { /* This should not happen: it indicates that * odp_flow_key_from_mask() and odp_flow_key_to_mask() @@ -1919,7 +1919,7 @@ dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, { odp_port_t in_port; - if (odp_flow_key_to_flow(key, key_len, flow)) { + if (odp_flow_key_to_flow_udpif(key, key_len, flow)) { /* This should not happen: it indicates that odp_flow_key_from_flow() * and odp_flow_key_to_flow() disagree on the acceptable form of a * flow. Log the problem as an error, with enough details to enable @@ -3022,11 +3022,27 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_, struct ofpbuf *actions, struct ofpbuf *put_actions) { struct dp_netdev *dp = pmd->dp; + struct flow_tnl orig_tunnel; + int err; if (OVS_UNLIKELY(!dp->upcall_cb)) { return ENODEV; } + /* Upcall processing expects the Geneve options to be in the translated + * format but we need to retain the raw format for datapath use. */ + orig_tunnel.flags = flow->tunnel.flags; + if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) { + orig_tunnel.metadata.present.len = flow->tunnel.metadata.present.len; + memcpy(orig_tunnel.metadata.opts.gnv, flow->tunnel.metadata.opts.gnv, + flow->tunnel.metadata.present.len); + err = tun_metadata_from_geneve_udpif(&orig_tunnel, &orig_tunnel, + &flow->tunnel); + if (err) { + return err; + } + } + if (OVS_UNLIKELY(!VLOG_DROP_DBG(&upcall_rl))) { struct ds ds = DS_EMPTY_INITIALIZER; char *packet_str; @@ -3054,8 +3070,44 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_, ds_destroy(&ds); } - return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata, - actions, wc, put_actions, dp->upcall_aux); + err = dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata, + actions, wc, put_actions, dp->upcall_aux); + if (err && err != ENOSPC) { + return err; + } + + /* Translate tunnel metadata masks to datapath format. */ + if (wc) { + if (wc->masks.tunnel.metadata.present.map) { + struct geneve_opt opts[GENEVE_TOT_OPT_SIZE / + sizeof(struct geneve_opt)]; + + tun_metadata_to_geneve_udpif_mask(&flow->tunnel, + &wc->masks.tunnel, + orig_tunnel.metadata.opts.gnv, + orig_tunnel.metadata.present.len, + opts); + + memset(&wc->masks.tunnel.metadata, 0, + sizeof wc->masks.tunnel.metadata); + memcpy(&wc->masks.tunnel.metadata.opts.gnv, opts, + orig_tunnel.metadata.present.len); + } + wc->masks.tunnel.metadata.present.len = 0xff; + } + + /* Restore tunnel metadata. We need to use the saved options to ensure + * that any unknown options are not lost. The generated mask will have + * the same structure, matching on types and lengths but wildcarding + * option data we don't care about. */ + if (orig_tunnel.flags & FLOW_TNL_F_UDPIF) { + memcpy(&flow->tunnel.metadata.opts.gnv, orig_tunnel.metadata.opts.gnv, + orig_tunnel.metadata.present.len); + flow->tunnel.metadata.present.len = orig_tunnel.metadata.present.len; + flow->tunnel.flags |= FLOW_TNL_F_UDPIF; + } + + return err; } static inline uint32_t diff --git a/lib/flow.c b/lib/flow.c index 352e9b86262..af51aacf783 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -462,9 +462,22 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) miniflow_push_words(mf, tunnel, &md->tunnel, offsetof(struct flow_tnl, metadata) / sizeof(uint64_t)); - if (md->tunnel.metadata.opt_map) { - miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata, - sizeof md->tunnel.metadata / sizeof(uint64_t)); + + if (!(md->tunnel.flags & FLOW_TNL_F_UDPIF)) { + if (md->tunnel.metadata.present.map) { + miniflow_push_words(mf, tunnel.metadata, &md->tunnel.metadata, + sizeof md->tunnel.metadata / + sizeof(uint64_t)); + } + } else { + if (md->tunnel.metadata.present.len) { + miniflow_push_words(mf, tunnel.metadata.present, + &md->tunnel.metadata.present, 1); + miniflow_push_words(mf, tunnel.metadata.opts.gnv, + md->tunnel.metadata.opts.gnv, + DIV_ROUND_UP(md->tunnel.metadata.present.len, + sizeof(uint64_t))); + } } } if (md->skb_priority || md->pkt_mark) { @@ -815,7 +828,7 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata) if (flow->tunnel.gbp_flags) { match_set_tun_gbp_flags(flow_metadata, flow->tunnel.gbp_flags); } - tun_metadata_get_fmd(&flow->tunnel.metadata, flow_metadata); + tun_metadata_get_fmd(&flow->tunnel, flow_metadata); if (flow->metadata != htonll(0)) { match_set_metadata(flow_metadata, flow->metadata); } @@ -1161,9 +1174,16 @@ void flow_wildcards_init_for_packet(struct flow_wildcards *wc, WC_MASK_FIELD(wc, tunnel.gbp_id); WC_MASK_FIELD(wc, tunnel.gbp_flags); - if (flow->tunnel.metadata.opt_map) { - wc->masks.tunnel.metadata.opt_map = flow->tunnel.metadata.opt_map; - WC_MASK_FIELD(wc, tunnel.metadata.opts); + if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) { + if (flow->tunnel.metadata.present.map) { + wc->masks.tunnel.metadata.present.map = + flow->tunnel.metadata.present.map; + WC_MASK_FIELD(wc, tunnel.metadata.opts.u8); + } + } else { + WC_MASK_FIELD(wc, tunnel.metadata.present.len); + memset(wc->masks.tunnel.metadata.opts.gnv, 0xff, + flow->tunnel.metadata.present.len); } } else if (flow->tunnel.tun_id) { WC_MASK_FIELD(wc, tunnel.tun_id); @@ -1253,9 +1273,16 @@ flow_wc_map(const struct flow *flow, struct miniflow *map) map->tnl_map = 0; if (flow->tunnel.ip_dst) { - map->tnl_map = MINIFLOW_TNL_MAP(tunnel); - if (!flow->tunnel.metadata.opt_map) { - map->tnl_map &= ~MINIFLOW_TNL_MAP(tunnel.metadata); + map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel, + offsetof(struct flow_tnl, metadata)); + if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) { + if (flow->tunnel.metadata.present.map) { + map->tnl_map |= MINIFLOW_TNL_MAP(tunnel.metadata); + } + } else { + map->tnl_map |= MINIFLOW_TNL_MAP(tunnel.metadata.present.len); + map->tnl_map |= MINIFLOW_TNL_MAP__(tunnel.metadata.opts.gnv, + flow->tunnel.metadata.present.len); } } diff --git a/lib/flow.h b/lib/flow.h index 96aa4aaab4b..5bc926705c0 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -80,6 +80,12 @@ BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM); #define FLOW_TNL_F_MASK ((1 << 4) - 1) +/* Purely internal to OVS userspace. These flags should never be exposed to + * the outside world and so aren't included in the flags mask. */ + +/* Tunnel information is in userspace datapath format. */ +#define FLOW_TNL_F_UDPIF (1 << 4) + const char *flow_tun_flag_to_string(uint32_t flags); /* Maximum number of supported MPLS labels. */ @@ -518,9 +524,12 @@ flow_values_get_next_in_maps(struct flow_for_each_in_maps_aux *aux, #define FLOW_U64_SIZE(FIELD) \ DIV_ROUND_UP(sizeof(((struct flow *)0)->FIELD), sizeof(uint64_t)) -#define MINIFLOW_TNL_MAP(FIELD) \ - (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1) \ +#define MINIFLOW_TNL_MAP__(FIELD, LEN) \ + (((UINT64_C(1) << DIV_ROUND_UP(LEN, sizeof(uint64_t))) - 1) \ << (offsetof(struct flow, FIELD) / sizeof(uint64_t))) + +#define MINIFLOW_TNL_MAP(FIELD) \ + MINIFLOW_TNL_MAP__(FIELD, sizeof(((struct flow *)0)->FIELD)) #define MINIFLOW_PKT_MAP(FIELD) \ (((UINT64_C(1) << FLOW_U64_SIZE(FIELD)) - 1) \ << ((offsetof(struct flow, FIELD) / sizeof(uint64_t)) - FLOW_TNL_U64S)) diff --git a/lib/geneve.h b/lib/geneve.h new file mode 100644 index 00000000000..f0256b1da34 --- /dev/null +++ b/lib/geneve.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2015 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GENEVE_H +#define GENEVE_H 1 + +#include "openvswitch/types.h" + +#define GENEVE_MAX_OPT_SIZE 124 +#define GENEVE_TOT_OPT_SIZE 252 + +#define GENEVE_CRIT_OPT_TYPE (1 << 7) + +struct geneve_opt { + ovs_be16 opt_class; + uint8_t type; +#ifdef WORDS_BIGENDIAN + uint8_t r1:1; + uint8_t r2:1; + uint8_t r3:1; + uint8_t length:5; +#else + uint8_t length:5; + uint8_t r3:1; + uint8_t r2:1; + uint8_t r1:1; +#endif + /* Option data */ +}; + +struct genevehdr { +#ifdef WORDS_BIGENDIAN + uint8_t ver:2; + uint8_t opt_len:6; + uint8_t oam:1; + uint8_t critical:1; + uint8_t rsvd1:6; +#else + uint8_t opt_len:6; + uint8_t ver:2; + uint8_t rsvd1:6; + uint8_t critical:1; + uint8_t oam:1; +#endif + ovs_be16 proto_type; + ovs_16aligned_be32 vni; + struct geneve_opt options[]; +}; + +#endif /* geneve.h */ diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 0c01414c983..4c7cf2c9e4b 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -196,7 +196,7 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) CASE_MFF_TUN_METADATA: { union mf_value value; - tun_metadata_read(&wc->masks.tunnel.metadata, mf, &value); + tun_metadata_read(&wc->masks.tunnel, mf, &value); return is_all_zeros(&value.tun_metadata, mf->n_bytes); } case MFF_METADATA: @@ -616,7 +616,7 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow, value->u8 = flow->tunnel.ip_tos; break; CASE_MFF_TUN_METADATA: - tun_metadata_read(&flow->tunnel.metadata, mf, value); + tun_metadata_read(&flow->tunnel, mf, value); break; case MFF_METADATA: @@ -1119,7 +1119,7 @@ mf_set_flow_value(const struct mf_field *mf, flow->tunnel.ip_ttl = value->u8; break; CASE_MFF_TUN_METADATA: - tun_metadata_write(&flow->tunnel.metadata, mf, value); + tun_metadata_write(&flow->tunnel, mf, value); break; case MFF_METADATA: flow->metadata = value->be64; diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index a3394dd6359..a0e53b86655 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -1054,11 +1054,10 @@ parse_gre_header(struct dp_packet *packet, static void pkt_metadata_init_tnl(struct pkt_metadata *md) { - memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata)); - - /* If 'opt_map' is zero then none of the rest of the tunnel metadata - * will be read, so we can skip clearing it. */ - md->tunnel.metadata.opt_map = 0; + /* Zero up through the tunnel metadata options. The length and table + * are before this and as long as they are empty, the options won't + * be looked at. */ + memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata.opts)); } static int @@ -1208,8 +1207,7 @@ netdev_geneve_pop_header(struct dp_packet *packet) struct pkt_metadata *md = &packet->md; struct flow_tnl *tnl = &md->tunnel; struct genevehdr *gnh; - unsigned int hlen; - int err; + unsigned int hlen, opts_len; pkt_metadata_init_tnl(md); if (GENEVE_BASE_HLEN > dp_packet_size(packet)) { @@ -1223,7 +1221,8 @@ netdev_geneve_pop_header(struct dp_packet *packet) return EINVAL; } - hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4; + opts_len = gnh->opt_len * 4; + hlen = GENEVE_BASE_HLEN + opts_len; if (hlen > dp_packet_size(packet)) { VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n", hlen, dp_packet_size(packet)); @@ -1245,12 +1244,9 @@ netdev_geneve_pop_header(struct dp_packet *packet) tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8); tnl->flags |= FLOW_TNL_F_KEY; - err = tun_metadata_from_geneve_header(gnh->options, gnh->opt_len * 4, - &tnl->metadata); - if (err) { - VLOG_WARN_RL(&err_rl, "invalid geneve options"); - return err; - } + memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len); + tnl->metadata.present.len = opts_len; + tnl->flags |= FLOW_TNL_F_UDPIF; dp_packet_reset_packet(packet, hlen); @@ -1278,7 +1274,7 @@ netdev_geneve_build_header(const struct netdev *netdev, ovs_mutex_unlock(&dev->mutex); - opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel.metadata, + opt_len = tun_metadata_to_geneve_header(&tnl_flow->tunnel, gnh->options, &crit_opt); gnh->opt_len = opt_len / 4; diff --git a/lib/odp-execute.c b/lib/odp-execute.c index c6764510058..c4806e1f483 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -151,7 +151,7 @@ odp_set_tunnel_action(const struct nlattr *a, struct flow_tnl *tun_key) { enum odp_key_fitness fitness; - fitness = odp_tun_key_from_attr(a, tun_key); + fitness = odp_tun_key_from_attr(a, true, tun_key); ovs_assert(fitness != ODP_FIT_ERROR); } diff --git a/lib/odp-util.c b/lib/odp-util.c index eec0bfb7e68..f142f037d6f 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1264,7 +1264,8 @@ ovs_frag_type_to_string(enum ovs_frag_type type) static enum odp_key_fitness odp_tun_key_from_attr__(const struct nlattr *attr, const struct nlattr *flow_attrs, size_t flow_attr_len, - const struct flow_tnl *src_tun, struct flow_tnl *tun) + const struct flow_tnl *src_tun, struct flow_tnl *tun, + bool udpif) { unsigned int left; const struct nlattr *a; @@ -1335,8 +1336,7 @@ odp_tun_key_from_attr__(const struct nlattr *attr, } case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: if (tun_metadata_from_geneve_nlattr(a, flow_attrs, flow_attr_len, - &src_tun->metadata, - &tun->metadata)) { + src_tun, udpif, tun)) { return ODP_FIT_ERROR; } break; @@ -1359,10 +1359,11 @@ odp_tun_key_from_attr__(const struct nlattr *attr, } enum odp_key_fitness -odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) +odp_tun_key_from_attr(const struct nlattr *attr, bool udpif, + struct flow_tnl *tun) { memset(tun, 0, sizeof *tun); - return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun); + return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun, udpif); } static void @@ -1411,13 +1412,7 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, (tun_key->gbp_flags << 16) | ntohs(tun_key->gbp_id)); nl_msg_end_nested(a, vxlan_opts_ofs); } - - if (tun_key == tun_flow_key) { - tun_metadata_to_geneve_nlattr_flow(&tun_key->metadata, a); - } else { - tun_metadata_to_geneve_nlattr_mask(key_buf, &tun_key->metadata, - &tun_flow_key->metadata, a); - } + tun_metadata_to_geneve_nlattr(tun_key, tun_flow_key, key_buf, a); nl_msg_end_nested(a, tun_key_ofs); } @@ -3597,7 +3592,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, case OVS_KEY_ATTR_TUNNEL: { enum odp_key_fitness res; - res = odp_tun_key_from_attr(nla, &md->tunnel); + res = odp_tun_key_from_attr(nla, true, &md->tunnel); if (res == ODP_FIT_ERROR) { memset(&md->tunnel, 0, sizeof md->tunnel); } else if (res == ODP_FIT_PERFECT) { @@ -4107,7 +4102,8 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], static enum odp_key_fitness odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, const struct nlattr *src_key, size_t src_key_len, - struct flow *flow, const struct flow *src_flow) + struct flow *flow, const struct flow *src_flow, + bool udpif) { const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1]; uint64_t expected_attrs; @@ -4150,9 +4146,10 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { enum odp_key_fitness res; - res = odp_tun_key_from_attr__(attrs[OVS_KEY_ATTR_TUNNEL], src_key, + res = odp_tun_key_from_attr__(attrs[OVS_KEY_ATTR_TUNNEL], + is_mask ? src_key : NULL, src_key_len, &src_flow->tunnel, - &flow->tunnel); + &flow->tunnel, udpif); if (res == ODP_FIT_ERROR) { return ODP_FIT_ERROR; } else if (res == ODP_FIT_PERFECT) { @@ -4224,7 +4221,7 @@ enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, struct flow *flow) { - return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow); + return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, false); } /* Converts the 'mask_key_len' bytes of OVS_KEY_ATTR_* attributes in 'mask_key' @@ -4238,7 +4235,32 @@ odp_flow_key_to_mask(const struct nlattr *mask_key, size_t mask_key_len, struct flow *mask, const struct flow *flow) { return odp_flow_key_to_flow__(mask_key, mask_key_len, flow_key, flow_key_len, - mask, flow); + mask, flow, false); +} + +/* These functions are similar to their non-"_udpif" variants but output a + * 'flow' that is suitable for fast-path packet processing. + * + * Some fields have different representation for flow setup and per- + * packet processing (i.e. different between ofproto-dpif and userspace + * datapath). In particular, with the non-"_udpif" functions, struct + * tun_metadata is in the per-flow format (using 'present.map' and 'opts.u8'); + * with these functions, struct tun_metadata is in the per-packet format + * (using 'present.len' and 'opts.gnv'). */ +enum odp_key_fitness +odp_flow_key_to_flow_udpif(const struct nlattr *key, size_t key_len, + struct flow *flow) +{ + return odp_flow_key_to_flow__(key, key_len, NULL, 0, flow, flow, true); +} + +enum odp_key_fitness +odp_flow_key_to_mask_udpif(const struct nlattr *mask_key, size_t mask_key_len, + const struct nlattr *flow_key, size_t flow_key_len, + struct flow *mask, const struct flow *flow) +{ + return odp_flow_key_to_flow__(mask_key, mask_key_len, flow_key, flow_key_len, + mask, flow, true); } /* Returns 'fitness' as a string, for use in debug messages. */ diff --git a/lib/odp-util.h b/lib/odp-util.h index 1eaa06b042f..bc2779413c7 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -144,7 +144,7 @@ struct odputil_keybuf { uint32_t keybuf[DIV_ROUND_UP(ODPUTIL_FLOW_KEY_BYTES, 4)]; }; -enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *, +enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *, bool udpif, struct flow_tnl *); int odp_ufid_from_string(const char *s_, ovs_u128 *ufid); @@ -225,6 +225,16 @@ enum odp_key_fitness odp_flow_key_to_mask(const struct nlattr *mask_key, size_t flow_key_len, struct flow *mask, const struct flow *flow); + +enum odp_key_fitness odp_flow_key_to_flow_udpif(const struct nlattr *, size_t, + struct flow *); +enum odp_key_fitness odp_flow_key_to_mask_udpif(const struct nlattr *mask_key, + size_t mask_key_len, + const struct nlattr *flow_key, + size_t flow_key_len, + struct flow *mask, + const struct flow *flow); + const char *odp_key_fitness_to_string(enum odp_key_fitness); void commit_odp_tunnel_action(const struct flow *, struct flow *base, diff --git a/lib/packets.h b/lib/packets.h index c709af52258..38af37b721c 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -23,6 +23,7 @@ #include #include #include "compiler.h" +#include "geneve.h" #include "openvswitch/types.h" #include "random.h" #include "hash.h" @@ -802,46 +803,6 @@ static inline bool dl_type_is_ip_any(ovs_be16 dl_type) } /* Tunnel header */ -#define GENEVE_MAX_OPT_SIZE 124 -#define GENEVE_TOT_OPT_SIZE 252 - -#define GENEVE_CRIT_OPT_TYPE (1 << 7) - -struct geneve_opt { - ovs_be16 opt_class; - uint8_t type; -#ifdef WORDS_BIGENDIAN - uint8_t r1:1; - uint8_t r2:1; - uint8_t r3:1; - uint8_t length:5; -#else - uint8_t length:5; - uint8_t r3:1; - uint8_t r2:1; - uint8_t r1:1; -#endif - /* Option data */ -}; - -struct genevehdr { -#ifdef WORDS_BIGENDIAN - uint8_t ver:2; - uint8_t opt_len:6; - uint8_t oam:1; - uint8_t critical:1; - uint8_t rsvd1:6; -#else - uint8_t opt_len:6; - uint8_t ver:2; - uint8_t rsvd1:6; - uint8_t critical:1; - uint8_t oam:1; -#endif - ovs_be16 proto_type; - ovs_16aligned_be32 vni; - struct geneve_opt options[]; -}; /* GRE protocol header */ struct gre_base_hdr { diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c index 7d82fb76246..216d5e4850f 100644 --- a/lib/tun-metadata.c +++ b/lib/tun-metadata.c @@ -226,7 +226,7 @@ tun_metadata_table_request(struct ofputil_geneve_table_reply *gtr) } } -/* Copies the value of field 'mf' from 'metadata' into 'value'. +/* Copies the value of field 'mf' from 'tnl' (which must be in non-UDPIF format) * into 'value'. * * 'mf' must be an MFF_TUN_METADATA* field. * @@ -234,7 +234,7 @@ tun_metadata_table_request(struct ofputil_geneve_table_reply *gtr) * tun_metadata_init(). If no such table has been created or if 'mf' hasn't * been allocated in it yet, this just zeros 'value'. */ void -tun_metadata_read(const struct tun_metadata *metadata, +tun_metadata_read(const struct flow_tnl *tnl, const struct mf_field *mf, union mf_value *value) { struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); @@ -250,10 +250,10 @@ tun_metadata_read(const struct tun_metadata *metadata, memset(value->tun_metadata, 0, mf->n_bytes - loc->len); memcpy_from_metadata(value->tun_metadata + mf->n_bytes - loc->len, - metadata, loc); + &tnl->metadata, loc); } -/* Copies 'value' into field 'mf' in 'metadata'. +/* Copies 'value' into field 'mf' in 'tnl' (in non-UDPIF format). * * 'mf' must be an MFF_TUN_METADATA* field. * @@ -261,7 +261,7 @@ tun_metadata_read(const struct tun_metadata *metadata, * tun_metadata_init(). If no such table has been created or if 'mf' hasn't * been allocated in it yet, this function does nothing. */ void -tun_metadata_write(struct tun_metadata *metadata, +tun_metadata_write(struct flow_tnl *tnl, const struct mf_field *mf, const union mf_value *value) { struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); @@ -274,9 +274,9 @@ tun_metadata_write(struct tun_metadata *metadata, loc = &map->entries[idx].loc; - ULLONG_SET1(metadata->opt_map, idx); - memcpy_to_metadata(metadata, value->tun_metadata + mf->n_bytes - loc->len, - loc); + ULLONG_SET1(tnl->metadata.present.map, idx); + memcpy_to_metadata(&tnl->metadata, + value->tun_metadata + mf->n_bytes - loc->len, loc); } static const struct tun_metadata_loc * @@ -310,7 +310,7 @@ metadata_loc_from_match(struct tun_table *map, struct match *match, /* Makes 'match' match 'value'/'mask' on field 'mf'. * - * 'mf' must be an MFF_TUN_METADATA* field. + * 'mf' must be an MFF_TUN_METADATA* field. 'match' must be in non-UDPIF format. * * If there is global tunnel metadata matching table, this function is * effective only if there is already a mapping for 'mf'. Otherwise, the @@ -334,6 +334,8 @@ tun_metadata_set_match(const struct mf_field *mf, const union mf_value *value, unsigned int data_offset; union mf_value data; + ovs_assert(!(match->flow.tunnel.flags & FLOW_TNL_F_UDPIF)); + field_len = mf_field_len(mf, value, mask); loc = metadata_loc_from_match(map, match, idx, field_len); if (!loc) { @@ -353,7 +355,7 @@ tun_metadata_set_match(const struct mf_field *mf, const union mf_value *value, mask->tun_metadata[data_offset + i]; } } - ULLONG_SET1(match->flow.tunnel.metadata.opt_map, idx); + ULLONG_SET1(match->flow.tunnel.metadata.present.map, idx); memcpy_to_metadata(&match->flow.tunnel.metadata, data.tun_metadata, loc); if (!value) { @@ -363,31 +365,67 @@ tun_metadata_set_match(const struct mf_field *mf, const union mf_value *value, } else { memcpy(data.tun_metadata, mask->tun_metadata + data_offset, loc->len); } - ULLONG_SET1(match->wc.masks.tunnel.metadata.opt_map, idx); + ULLONG_SET1(match->wc.masks.tunnel.metadata.present.map, idx); memcpy_to_metadata(&match->wc.masks.tunnel.metadata, data.tun_metadata, loc); } -/* Copies all MFF_TUN_METADATA* fields from 'metadata' to 'flow_metadata'. */ +static bool +udpif_to_parsed(const struct flow_tnl *flow, const struct flow_tnl *mask, + struct flow_tnl *flow_xlate, struct flow_tnl *mask_xlate) +{ + if (flow->flags & FLOW_TNL_F_UDPIF) { + int err; + + err = tun_metadata_from_geneve_udpif(flow, flow, flow_xlate); + if (err) { + return false; + } + + if (mask) { + tun_metadata_from_geneve_udpif(flow, mask, mask_xlate); + if (err) { + return false; + } + } + } else { + if (flow->metadata.present.map == 0) { + /* There is no tunnel metadata, don't bother copying. */ + return false; + } + + memcpy(flow_xlate, flow, sizeof *flow_xlate); + if (mask) { + memcpy(mask_xlate, mask, sizeof *mask_xlate); + } + + if (!flow_xlate->metadata.tab) { + flow_xlate->metadata.tab = ovsrcu_get(struct tun_table *, + &metadata_tab); + } + } + + return true; +} + +/* Copies all MFF_TUN_METADATA* fields from 'tnl' to 'flow_metadata'. */ void -tun_metadata_get_fmd(const struct tun_metadata *metadata, - struct match *flow_metadata) +tun_metadata_get_fmd(const struct flow_tnl *tnl, struct match *flow_metadata) { - struct tun_table *map; + struct flow_tnl flow; int i; - map = metadata->tab; - if (!map) { - map = ovsrcu_get(struct tun_table *, &metadata_tab); + if (!udpif_to_parsed(tnl, NULL, &flow, NULL)) { + return; } - ULLONG_FOR_EACH_1 (i, metadata->opt_map) { + ULLONG_FOR_EACH_1 (i, flow.metadata.present.map) { union mf_value opts; - const struct tun_metadata_loc *old_loc = &map->entries[i].loc; + const struct tun_metadata_loc *old_loc = &flow.metadata.tab->entries[i].loc; const struct tun_metadata_loc *new_loc; new_loc = metadata_loc_from_match(NULL, flow_metadata, i, old_loc->len); - memcpy_from_metadata(opts.tun_metadata, metadata, old_loc); + memcpy_from_metadata(opts.tun_metadata, &flow.metadata, old_loc); memcpy_to_metadata(&flow_metadata->flow.tunnel.metadata, opts.tun_metadata, new_loc); @@ -424,7 +462,7 @@ memcpy_to_metadata(struct tun_metadata *dst, const void *src, int addr = 0; while (chain) { - memcpy(dst->opts + loc->c.offset + addr, (uint8_t *)src + addr, + memcpy(dst->opts.u8 + loc->c.offset + addr, (uint8_t *)src + addr, chain->len); addr += chain->len; chain = chain->next; @@ -439,7 +477,7 @@ memcpy_from_metadata(void *dst, const struct tun_metadata *src, int addr = 0; while (chain) { - memcpy((uint8_t *)dst + addr, src->opts + loc->c.offset + addr, + memcpy((uint8_t *)dst + addr, src->opts.u8 + loc->c.offset + addr, chain->len); addr += chain->len; chain = chain->next; @@ -579,10 +617,21 @@ tun_metadata_del_entry(struct tun_table *map, uint8_t idx) } static int -tun_metadata_from_geneve__(struct tun_table *map, const struct geneve_opt *opt, +tun_metadata_from_geneve__(const struct tun_metadata *flow_metadata, + const struct geneve_opt *opt, const struct geneve_opt *flow_opt, int opts_len, struct tun_metadata *metadata) { + struct tun_table *map; + bool is_mask = flow_opt != opt; + + if (!is_mask) { + map = ovsrcu_get(struct tun_table *, &metadata_tab); + metadata->tab = map; + } else { + map = flow_metadata->tab; + } + if (!map) { return 0; } @@ -606,7 +655,7 @@ tun_metadata_from_geneve__(struct tun_table *map, const struct geneve_opt *opt, if (entry) { if (entry->loc.len == flow_opt->length * 4) { memcpy_to_metadata(metadata, opt + 1, &entry->loc); - ULLONG_SET1(metadata->opt_map, entry - map->entries); + ULLONG_SET1(metadata->present.map, entry - map->entries); } else { return EINVAL; } @@ -622,59 +671,97 @@ tun_metadata_from_geneve__(struct tun_table *map, const struct geneve_opt *opt, return 0; } +static const struct nlattr * +tun_metadata_find_geneve_key(const struct nlattr *key, uint32_t key_len) +{ + const struct nlattr *tnl_key; + + tnl_key = nl_attr_find__(key, key_len, OVS_KEY_ATTR_TUNNEL); + if (!tnl_key) { + return NULL; + } + + return nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); +} + +/* Converts from Geneve netlink attributes in 'attr' to tunnel metadata + * in 'tun'. The result may either in be UDPIF format or not, as determined + * by 'udpif'. + * + * In the event that a mask is being converted, it is also necessary to + * pass in flow information. This includes the full set of netlink attributes + * (i.e. not just the Geneve attribute) in 'flow_attrs'/'flow_attr_len' and + * the previously converted tunnel metadata 'flow_tun'. + * + * If a flow rather than mask is being converted, 'flow_attrs' must be NULL. */ int tun_metadata_from_geneve_nlattr(const struct nlattr *attr, const struct nlattr *flow_attrs, size_t flow_attr_len, - const struct tun_metadata *flow_metadata, - struct tun_metadata *metadata) + const struct flow_tnl *flow_tun, bool udpif, + struct flow_tnl *tun) { - struct tun_table *map; bool is_mask = !!flow_attrs; + int attr_len = nl_attr_get_size(attr); const struct nlattr *flow; - if (is_mask) { - const struct nlattr *tnl_key; - int mask_len = nl_attr_get_size(attr); + /* No need for real translation, just copy things over. */ + if (udpif) { + memcpy(tun->metadata.opts.gnv, nl_attr_get(attr), attr_len); - tnl_key = nl_attr_find__(flow_attrs, flow_attr_len, OVS_KEY_ATTR_TUNNEL); - if (!tnl_key) { - return mask_len ? EINVAL : 0; + if (!is_mask) { + tun->metadata.present.len = attr_len; + tun->flags |= FLOW_TNL_F_UDPIF; + } else { + /* We need to exact match on the length so we don't + * accidentally match on sets of options that are the same + * at the beginning but with additional options after. */ + tun->metadata.present.len = 0xff; } - flow = nl_attr_find_nested(tnl_key, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); + return 0; + } + + if (is_mask) { + flow = tun_metadata_find_geneve_key(flow_attrs, flow_attr_len); if (!flow) { - return mask_len ? EINVAL : 0; + return attr_len ? EINVAL : 0; } - if (mask_len != nl_attr_get_size(flow)) { + if (attr_len != nl_attr_get_size(flow)) { return EINVAL; } } else { flow = attr; } - if (!is_mask) { - map = ovsrcu_get(struct tun_table *, &metadata_tab); - metadata->tab = map; - } else { - map = flow_metadata->tab; - } - - return tun_metadata_from_geneve__(map, nl_attr_get(attr), nl_attr_get(flow), - nl_attr_get_size(flow), metadata); + return tun_metadata_from_geneve__(&flow_tun->metadata, nl_attr_get(attr), + nl_attr_get(flow), nl_attr_get_size(flow), + &tun->metadata); } +/* Converts from the flat Geneve options representation extracted directly + * from the tunnel header to the representation that maps options to + * pre-allocated locations. The original version (in UDPIF form) is passed + * in 'src' and the translated form in stored in 'dst'. To handle masks, the + * flow must also be passed in through 'flow' (in the original, raw form). */ int -tun_metadata_from_geneve_header(const struct geneve_opt *opts, int opt_len, - struct tun_metadata *metadata) +tun_metadata_from_geneve_udpif(const struct flow_tnl *flow, + const struct flow_tnl *src, + struct flow_tnl *dst) { - struct tun_table *map; - - map = ovsrcu_get(struct tun_table *, &metadata_tab); - metadata->tab = map; + ovs_assert(flow->flags & FLOW_TNL_F_UDPIF); - return tun_metadata_from_geneve__(map, opts, opts, opt_len, metadata); + if (flow == src) { + dst->flags = flow->flags & ~FLOW_TNL_F_UDPIF; + } else { + dst->metadata.tab = NULL; + } + dst->metadata.present.map = 0; + return tun_metadata_from_geneve__(&flow->metadata, src->metadata.opts.gnv, + flow->metadata.opts.gnv, + flow->metadata.present.len, + &dst->metadata); } static void @@ -691,7 +778,7 @@ tun_metadata_to_geneve__(const struct tun_metadata *flow, struct ofpbuf *b, *crit_opt = false; - ULLONG_FOR_EACH_1 (i, flow->opt_map) { + ULLONG_FOR_EACH_1 (i, flow->present.map) { struct tun_meta_entry *entry = &map->entries[i]; struct geneve_opt *opt; @@ -709,14 +796,14 @@ tun_metadata_to_geneve__(const struct tun_metadata *flow, struct ofpbuf *b, } } -void -tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow, +static void +tun_metadata_to_geneve_nlattr_flow(const struct flow_tnl *flow, struct ofpbuf *b) { size_t nlattr_offset; bool crit_opt; - if (!flow->opt_map) { + if (!flow->metadata.present.map) { return; } @@ -725,58 +812,43 @@ tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow, * similar enough that we can use the same mechanism. */ nlattr_offset = nl_msg_start_nested(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); - tun_metadata_to_geneve__(flow, b, &crit_opt); + tun_metadata_to_geneve__(&flow->metadata, b, &crit_opt); nl_msg_end_nested(b, nlattr_offset); } +/* Converts from processed tunnel metadata information (in non-udpif + * format) in 'flow' to a stream of Geneve options suitable for + * transmission in 'opts'. Additionally returns whether there were + * any critical options in 'crit_opt' as well as the total length of + * data. */ int -tun_metadata_to_geneve_header(const struct tun_metadata *flow, +tun_metadata_to_geneve_header(const struct flow_tnl *flow, struct geneve_opt *opts, bool *crit_opt) { struct ofpbuf b; + ovs_assert(!(flow->flags & FLOW_TNL_F_UDPIF)); + ofpbuf_use_stack(&b, opts, GENEVE_TOT_OPT_SIZE); - tun_metadata_to_geneve__(flow, &b, crit_opt); + tun_metadata_to_geneve__(&flow->metadata, &b, crit_opt); return b.size; } -void -tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key, - const struct tun_metadata *mask, - const struct tun_metadata *flow, - struct ofpbuf *b) +static void +tun_metadata_to_geneve_mask__(const struct tun_metadata *flow, + const struct tun_metadata *mask, + struct geneve_opt *opt, int opts_len) { struct tun_table *map = flow->tab; - const struct nlattr *tnl_key, *geneve_key; - struct nlattr *geneve_mask; - struct geneve_opt *opt; - int opts_len; if (!map) { return; } - tnl_key = nl_attr_find(key, 0, OVS_KEY_ATTR_TUNNEL); - if (!tnl_key) { - return; - } - - geneve_key = nl_attr_find_nested(tnl_key, - OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS); - if (!geneve_key) { - return; - } - - geneve_mask = ofpbuf_tail(b); - nl_msg_put(b, geneve_key, geneve_key->nla_len); - /* All of these options have already been validated, so no need * for sanity checking. */ - opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask)); - opts_len = nl_attr_get_size(geneve_mask); - while (opts_len > 0) { struct tun_meta_entry *entry; int len = sizeof(*opt) + opt->length * 4; @@ -801,6 +873,80 @@ tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key, } } +static void +tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key, + const struct flow_tnl *mask, + const struct flow_tnl *flow, + struct ofpbuf *b) +{ + const struct nlattr *geneve_key; + struct nlattr *geneve_mask; + struct geneve_opt *opt; + int opts_len; + + if (!key) { + return; + } + + geneve_key = tun_metadata_find_geneve_key(key->data, key->size); + if (!geneve_key) { + return; + } + + geneve_mask = ofpbuf_tail(b); + nl_msg_put(b, geneve_key, geneve_key->nla_len); + + opt = CONST_CAST(struct geneve_opt *, nl_attr_get(geneve_mask)); + opts_len = nl_attr_get_size(geneve_mask); + + tun_metadata_to_geneve_mask__(&flow->metadata, &mask->metadata, + opt, opts_len); +} + +/* Convert from the tunnel metadata in 'tun' to netlink attributes stored + * in 'b'. Either UDPIF or non-UDPIF input forms are accepted. + * + * To assist with parsing, it is necessary to also pass in the tunnel metadata + * from the flow in 'flow' as well in the original netlink form of the flow in + * 'key'. */ +void +tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun, + const struct flow_tnl *flow, + const struct ofpbuf *key, + struct ofpbuf *b) +{ + bool is_mask = tun != flow; + + if (!(flow->flags & FLOW_TNL_F_UDPIF)) { + if (!is_mask) { + tun_metadata_to_geneve_nlattr_flow(tun, b); + } else { + tun_metadata_to_geneve_nlattr_mask(key, tun, flow, b); + } + } else if (flow->metadata.present.len || is_mask) { + nl_msg_put_unspec(b, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + tun->metadata.opts.gnv, + flow->metadata.present.len); + } +} + +/* Converts 'mask_src' (in non-UDPIF format) to a series of masked options in + * 'dst'. 'flow_src' (also in non-UDPIF format) and the original set of + * options 'flow_src_opt'/'opts_len' are needed as a guide to interpret the + * mask data. */ +void +tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src, + const struct flow_tnl *mask_src, + const struct geneve_opt *flow_src_opt, + int opts_len, struct geneve_opt *dst) +{ + ovs_assert(!(flow_src->flags & FLOW_TNL_F_UDPIF)); + + memcpy(dst, flow_src_opt, opts_len); + tun_metadata_to_geneve_mask__(&flow_src->metadata, + &mask_src->metadata, dst, opts_len); +} + static const struct tun_metadata_loc * metadata_loc_from_match_read(struct tun_table *map, const struct match *match, unsigned int idx) @@ -816,19 +962,22 @@ void tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm, const struct match *match) { - struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); - const struct tun_metadata *metadata = &match->flow.tunnel.metadata; - const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata; + struct flow_tnl flow, mask; int i; - ULLONG_FOR_EACH_1 (i, mask->opt_map) { + if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel, + &flow, &mask)) { + return; + } + + ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) { const struct tun_metadata_loc *loc; union mf_value opts; union mf_value mask_opts; - loc = metadata_loc_from_match_read(map, match, i); - memcpy_from_metadata(opts.tun_metadata, metadata, loc); - memcpy_from_metadata(mask_opts.tun_metadata, mask, loc); + loc = metadata_loc_from_match_read(flow.metadata.tab, match, i); + memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc); + memcpy_from_metadata(mask_opts.tun_metadata, &mask.metadata, loc); nxm_put(b, MFF_TUN_METADATA0 + i, oxm, opts.tun_metadata, mask_opts.tun_metadata, loc->len); } @@ -837,22 +986,25 @@ tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm, void tun_metadata_match_format(struct ds *s, const struct match *match) { - struct tun_table *map = ovsrcu_get(struct tun_table *, &metadata_tab); - const struct tun_metadata *metadata = &match->flow.tunnel.metadata; - const struct tun_metadata *mask = &match->wc.masks.tunnel.metadata; + struct flow_tnl flow, mask; unsigned int i; - ULLONG_FOR_EACH_1 (i, mask->opt_map) { + if (!udpif_to_parsed(&match->flow.tunnel, &match->wc.masks.tunnel, + &flow, &mask)) { + return; + } + + ULLONG_FOR_EACH_1 (i, mask.metadata.present.map) { const struct tun_metadata_loc *loc; union mf_value opts; - loc = metadata_loc_from_match_read(map, match, i); + loc = metadata_loc_from_match_read(flow.metadata.tab, match, i); ds_put_format(s, "tun_metadata%u=", i); - memcpy_from_metadata(opts.tun_metadata, metadata, loc); + memcpy_from_metadata(opts.tun_metadata, &flow.metadata, loc); ds_put_hex(s, opts.tun_metadata, loc->len); - memcpy_from_metadata(opts.tun_metadata, mask, loc); + memcpy_from_metadata(opts.tun_metadata, &mask.metadata, loc); if (!is_all_ones(opts.tun_metadata, loc->len)) { ds_put_char(s, '/'); ds_put_hex(s, opts.tun_metadata, loc->len); diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h index 56bdf2a52c5..49db511830e 100644 --- a/lib/tun-metadata.h +++ b/lib/tun-metadata.h @@ -20,35 +20,56 @@ #include #include "dynamic-string.h" +#include "geneve.h" #include "netlink.h" #include "ofpbuf.h" #include "openflow/openflow.h" +struct flow_tnl; struct match; struct mf_field; union mf_value; struct ofputil_geneve_table_mod; struct ofputil_geneve_table_reply; struct tun_table; -struct geneve_opt; #define TUN_METADATA_NUM_OPTS 64 #define TUN_METADATA_TOT_OPT_SIZE 256 /* Tunnel option data, plus metadata to aid in their interpretation. * - * 'opt_map' is indexed by type, that is, by the in TUN_METADATA, so - * that e.g. TUN_METADATA5 is present if 'opt_map & (1ULL << 5)' is nonzero. - * The actual data for TUN_METADATA5, if present, might be anywhere in 'opts' - * (not necessarily even contiguous), and finding it requires referring to - * 'tab'. */ + * The option data exists in two forms and is interpreted differently depending + * on whether FLOW_TNL_F_UDPIF is set in struct flow_tnl flags: + * + * When FLOW_TNL_F_UDPIF is set, the tunnel metadata is in "userspace datapath + * format". This is typically used for fast-path packet processing to avoid + * the cost of translating options and in situations where we need to maintain + * tunnel metadata exactly as it came in. In this case 'opts.gnv' is raw + * packet data from the tunnel header and 'present.len' indicates the length + * of the data stored there. In these situations, 'tab' is NULL. + * + * In all other cases, we are doing flow-based processing (such as during + * upcalls). FLOW_TNL_F_UDPIF is not set and options are reordered into + * pre-allocated locations. 'present.map' is indexed by type, that is, by the + * in TUN_METADATA, so that e.g. TUN_METADATA5 is present if + * 'present.map & (1ULL << 5)' is nonzero. The actual data for TUN_METADATA5, + * if present, might be anywhere in 'opts.u8' (not necessarily even contiguous), + * and finding it requires referring to 'tab', if set, or the global metadata + * table. */ struct tun_metadata { - uint64_t opt_map; /* 1-bit for each present TLV. */ - uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */ + union { /* Valid members of 'opts'. When 'opts' is sorted into known types, + * 'map' is used. When 'opts' is raw packet data, 'len' is used. */ + uint64_t map; /* 1-bit for each present TLV. */ + uint8_t len; /* Length of data in 'opts'. */ + } present; struct tun_table *tab; /* Types & lengths for 'opts' and 'opt_map'. */ uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 bytes */ + union { + uint8_t u8[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */ + struct geneve_opt gnv[GENEVE_TOT_OPT_SIZE / sizeof(struct geneve_opt)]; + } opts; }; -BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->opt_map) * 8 >= +BUILD_ASSERT_DECL(sizeof(((struct tun_metadata *)0)->present.map) * 8 >= TUN_METADATA_NUM_OPTS); /* The location of an option can be stored either as a single offset/len @@ -81,31 +102,34 @@ void tun_metadata_init(void); enum ofperr tun_metadata_table_mod(struct ofputil_geneve_table_mod *); void tun_metadata_table_request(struct ofputil_geneve_table_reply *); -void tun_metadata_read(const struct tun_metadata *, +void tun_metadata_read(const struct flow_tnl *, const struct mf_field *, union mf_value *); -void tun_metadata_write(struct tun_metadata *, +void tun_metadata_write(struct flow_tnl *, const struct mf_field *, const union mf_value *); void tun_metadata_set_match(const struct mf_field *, const union mf_value *value, const union mf_value *mask, struct match *); -void tun_metadata_get_fmd(const struct tun_metadata *, - struct match *flow_metadata); +void tun_metadata_get_fmd(const struct flow_tnl *, struct match *flow_metadata); int tun_metadata_from_geneve_nlattr(const struct nlattr *attr, const struct nlattr *flow_attrs, size_t flow_attr_len, - const struct tun_metadata *flow_metadata, - struct tun_metadata *metadata); -int tun_metadata_from_geneve_header(const struct geneve_opt *, int opt_len, - struct tun_metadata *metadata); - -void tun_metadata_to_geneve_nlattr_flow(const struct tun_metadata *flow, - struct ofpbuf *); -void tun_metadata_to_geneve_nlattr_mask(const struct ofpbuf *key, - const struct tun_metadata *mask, - const struct tun_metadata *flow, - struct ofpbuf *); -int tun_metadata_to_geneve_header(const struct tun_metadata *flow, + const struct flow_tnl *flow_tun, + bool udpif, struct flow_tnl *tun); +void tun_metadata_to_geneve_nlattr(const struct flow_tnl *tun, + const struct flow_tnl *flow, + const struct ofpbuf *key, + struct ofpbuf *); + +int tun_metadata_from_geneve_udpif(const struct flow_tnl *flow, + const struct flow_tnl *src, + struct flow_tnl *dst); +void tun_metadata_to_geneve_udpif_mask(const struct flow_tnl *flow_src, + const struct flow_tnl *mask_src, + const struct geneve_opt *flow_src_opt, + int opts_len, struct geneve_opt *dst); + +int tun_metadata_to_geneve_header(const struct flow_tnl *flow, struct geneve_opt *, bool *crit_opt); void tun_metadata_to_nx_match(struct ofpbuf *b, enum ofp_version oxm, diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index e54d3fbd3f0..185addf0981 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -972,7 +972,7 @@ sflow_read_set_action(const struct nlattr *attr, /* Do not handle multi-encap for now. */ sflow_actions->tunnel_err = true; } else { - if (odp_tun_key_from_attr(attr, &sflow_actions->tunnel) + if (odp_tun_key_from_attr(attr, false, &sflow_actions->tunnel) == ODP_FIT_ERROR) { /* Tunnel parsing error. */ sflow_actions->tunnel_err = true; diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 59010c2d4e9..0f2e1860c6a 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1166,7 +1166,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix); if (upcall->out_tun_key) { - odp_tun_key_from_attr(upcall->out_tun_key, + odp_tun_key_from_attr(upcall->out_tun_key, false, &output_tunnel_key); } dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow, diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index bd95c8e5094..0f1724a9cc5 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -132,7 +132,7 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl port 5: rx pkts=1, bytes=98, drop=0, errs=0, frame=0, over=0, crc=0 ]) AT_CHECK([ovs-appctl dpif/dump-flows int-br], [0], [dnl -tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}),flags(-df-csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:drop +tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,ttl=64,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),skb_mark(0),recirc_id(0),in_port(6081),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:drop ]) OVS_VSWITCHD_STOP