diff --git a/NEWS b/NEWS index 9f45b5647b..e8d89f528b 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ OVN v24.09.3 - xx xxx xxxx -------------------------- + - Improved handling of IPv6 traffic by enabling address prefix tracking + in OVS for both IPv4 and IPv6 addresses, whenever possible, reducing + the amount of IPv6 datapath flows. OVN v24.09.2 - 21 Jan 2025 -------------------------- diff --git a/TODO.rst b/TODO.rst index 17d539ad68..01370e38bc 100644 --- a/TODO.rst +++ b/TODO.rst @@ -130,3 +130,6 @@ OVN To-do List * Move the lflow build parallel processing from northd.c to lflow-mgr.c This would also ensure that the variables declared in northd.c (eg. thread_lflow_counter) are not externed in lflow-mgr.c. + +* Remove OVS database schema parsing from features.c once server side column + types are available through IDL. diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 2602e977c0..8a6b2a62d8 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -493,6 +493,76 @@ create_br_datapath(struct ovsdb_idl_txn *ovs_idl_txn, return dp; } +#define N_FLOW_TABLES 255 + +static void +update_flow_table_prefixes(struct ovsdb_idl_txn *ovs_idl_txn, + const struct ovsrec_bridge *br_int) +{ + size_t max_prefixes = ovs_features_max_flow_table_prefixes_get(); + struct ds ds = DS_EMPTY_INITIALIZER; + const char *prefixes[] = { + "ip_src", "ip_dst", "ipv6_src", "ipv6_dst", + }; + struct ovsrec_flow_table *ft; + size_t i; + + /* We must not attempt setting more prefixes than our IDL supports. + * Note: This should be a build time assertion, but IDL structures + * are not defined as constants. */ + ovs_assert( + ARRAY_SIZE(prefixes) <= + ovsrec_flow_table_columns[OVSREC_FLOW_TABLE_COL_PREFIXES].type.n_max); + + if (!max_prefixes) { + /* Not discovered yet. */ + return; + } + + max_prefixes = MIN(max_prefixes, ARRAY_SIZE(prefixes)); + if (br_int->n_flow_tables == N_FLOW_TABLES && + br_int->value_flow_tables[0]->n_prefixes == max_prefixes) { + /* Already up to date. Ideally, we would check every table, + * but it seems excessive. */ + return; + } + + for (i = 1; i < br_int->n_flow_tables; i++) { + if (br_int->value_flow_tables[i] != br_int->value_flow_tables[0]) { + break; + } + } + if (i == N_FLOW_TABLES) { + /* Correct number of flow tables and all pointing to the same row. */ + ft = br_int->value_flow_tables[0]; + } else { + /* Unexpected configuration. Let's create a new flow table row. + * Old ones will be garbage collected by the database. */ + struct ovsrec_flow_table *values[N_FLOW_TABLES]; + int64_t keys[N_FLOW_TABLES]; + + ft = ovsrec_flow_table_insert(ovs_idl_txn); + for (i = 0; i < ARRAY_SIZE(values); i++) { + keys[i] = i; + values[i] = ft; + } + ovsrec_bridge_set_flow_tables(br_int, keys, values, + ARRAY_SIZE(values)); + } + + ds_put_cstr(&ds, "Setting flow table prefixes:"); + for (i = 0 ; i < max_prefixes; i++) { + ds_put_char(&ds, ' '); + ds_put_cstr(&ds, prefixes[i]); + ds_put_char(&ds, ','); + } + ds_chomp(&ds, ','); + VLOG_INFO("%s.", ds_cstr_ro(&ds)); + ds_destroy(&ds); + + ovsrec_flow_table_set_prefixes(ft, prefixes, max_prefixes); +} + static const struct ovsrec_bridge * get_br_int(const struct ovsrec_bridge_table *bridge_table, const struct ovsrec_open_vswitch_table *ovs_table) @@ -569,6 +639,8 @@ process_br_int(struct ovsdb_idl_txn *ovs_idl_txn, datapath_type); } } + + update_flow_table_prefixes(ovs_idl_txn, br_int); } } *br_int_ = br_int; @@ -789,8 +861,11 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl) ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_ports); ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_name); ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_fail_mode); + ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_flow_tables); ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_other_config); ovsdb_idl_add_column(ovs_idl, &ovsrec_bridge_col_external_ids); + ovsdb_idl_add_table(ovs_idl, &ovsrec_table_flow_table); + ovsdb_idl_add_column(ovs_idl, &ovsrec_flow_table_col_prefixes); ovsdb_idl_add_table(ovs_idl, &ovsrec_table_ssl); ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_bootstrap_ca_cert); ovsdb_idl_add_column(ovs_idl, &ovsrec_ssl_col_ca_cert); @@ -5519,7 +5594,8 @@ main(int argc, char *argv[]) && ovs_feature_support_run(br_int_dp ? &br_int_dp->capabilities : NULL, br_int_remote.target, - br_int_remote.probe_interval)) { + br_int_remote.probe_interval, + ovs_remote)) { VLOG_INFO("OVS feature set changed, force recompute."); engine_set_force_recompute(true); diff --git a/include/ovn/features.h b/include/ovn/features.h index 3566ab60ff..95f3704ca4 100644 --- a/include/ovn/features.h +++ b/include/ovn/features.h @@ -56,9 +56,11 @@ enum ovs_feature_value { void ovs_feature_support_destroy(void); bool ovs_feature_is_supported(enum ovs_feature_value feature); bool ovs_feature_support_run(const struct smap *ovs_capabilities, - const char *conn_target, int probe_interval); + const char *conn_target, int probe_interval, + const char *db_target); bool ovs_feature_set_discovered(void); uint32_t ovs_feature_max_meters_get(void); uint32_t ovs_feature_max_select_groups_get(void); +size_t ovs_features_max_flow_table_prefixes_get(void); #endif diff --git a/lib/features.c b/lib/features.c index 98d56602c4..7302599ac2 100644 --- a/lib/features.c +++ b/lib/features.c @@ -20,6 +20,9 @@ #include "lib/util.h" #include "lib/dirs.h" #include "socket-util.h" +#include "lib/ovsdb-cs.h" +#include "lib/ovsdb-error.h" +#include "lib/ovsdb-types.h" #include "lib/vswitch-idl.h" #include "odp-netlink.h" #include "openvswitch/vlog.h" @@ -418,17 +421,99 @@ ovs_feature_get_openflow_cap(void) return ret; } +/* Monitoring how many prefixes flow tables can have. + * TODO: Remove this code once the server column type is available via IDL. */ +static struct ovsdb_cs *vswitch_cs; +static size_t max_flow_table_prefixes; + +static struct json * +vswitch_schema_prefixes_find(const struct json *schema_json, + void *ctx OVS_UNUSED) +{ + /* We must return a monitor request object, but we do not actually want + * to monitor the data, so returning an empty one. */ + struct json *monitor_request = json_object_create(); + struct ovsdb_idl_column *prefixes_col; + + prefixes_col = &ovsrec_flow_table_columns[OVSREC_FLOW_TABLE_COL_PREFIXES]; + + if (schema_json->type != JSON_OBJECT) { + VLOG_WARN_RL(&rl, "OVS database schema is not a JSON object"); + return monitor_request; + } + + const char *ft_name = ovsrec_table_classes[OVSREC_TABLE_FLOW_TABLE].name; + const char *nested_objects[] = { + "tables", ft_name, "columns", prefixes_col->name, "type", + }; + const struct json *nested = schema_json; + + for (size_t i = 0; i < ARRAY_SIZE(nested_objects); i++) { + nested = shash_find_data(json_object(nested), nested_objects[i]); + if (!nested || nested->type != JSON_OBJECT) { + VLOG_WARN_RL(&rl, "OVS database schema has no valid '%s'.", + nested_objects[i]); + return monitor_request; + } + } + + struct ovsdb_type server_type; + struct ovsdb_error *error; + + error = ovsdb_type_from_json(&server_type, nested); + if (error) { + char *msg = ovsdb_error_to_string_free(error); + + VLOG_WARN_RL(&rl, "Failed to parse type of %s column in %s table: %s", + prefixes_col->name, ft_name, msg); + free(msg); + return monitor_request; + } + + if (max_flow_table_prefixes != server_type.n_max) { + VLOG_INFO_RL(&rl, "OVS DB schema supports %d flow table prefixes, " + "our IDL supports: %d", + server_type.n_max, prefixes_col->type.n_max); + max_flow_table_prefixes = server_type.n_max; + } + + return monitor_request; +} + +static struct ovsdb_cs_ops feature_cs_ops = { + .compose_monitor_requests = vswitch_schema_prefixes_find, +}; + +static void +vswitch_schema_prefixes_run(void) +{ + struct ovsdb_cs_event *event; + struct ovs_list events; + + ovsdb_cs_run(vswitch_cs, &events); + LIST_FOR_EACH_POP (event, list_node, &events) { + /* We're not really interested in events. We only care about the + * monitor request callback being invoked on re-connections or + * schema changes. */ + ovsdb_cs_event_destroy(event); + } + ovsdb_cs_wait(vswitch_cs); +} + void ovs_feature_support_destroy(void) { rconn_destroy(swconn); swconn = NULL; + ovsdb_cs_destroy(vswitch_cs); + vswitch_cs = NULL; } /* Returns 'true' if the set of tracked OVS features has been updated. */ bool ovs_feature_support_run(const struct smap *ovs_capabilities, - const char *conn_target, int probe_interval) + const char *conn_target, int probe_interval, + const char *db_target) { static struct smap empty_caps = SMAP_INITIALIZER(&empty_caps); @@ -449,6 +534,16 @@ ovs_feature_support_run(const struct smap *ovs_capabilities, updated |= handle_feature_state_update(new_value, feature->value, feature->name); } + + if (!vswitch_cs) { + vswitch_cs = ovsdb_cs_create(ovsrec_idl_class.database, 3, + &feature_cs_ops, NULL); + } + ovsdb_cs_set_remote(vswitch_cs, db_target, true); + /* This feature doesn't affect OpenFlow rules, so not touching 'updated' + * even if changed. */ + vswitch_schema_prefixes_run(); + return updated; } @@ -479,3 +574,11 @@ ovs_feature_max_select_groups_get(void) { return ovs_group_features.max_groups[OFPGT11_SELECT]; } + +/* Returns the number of flow table prefixes that can be configured in + * the OVS database. */ +size_t +ovs_features_max_flow_table_prefixes_get(void) +{ + return max_flow_table_prefixes; +} diff --git a/lib/test-ovn-features.c b/lib/test-ovn-features.c index cddeae7791..40fba6a439 100644 --- a/lib/test-ovn-features.c +++ b/lib/test-ovn-features.c @@ -26,15 +26,15 @@ test_ovn_features(struct ovs_cmdl_context *ctx OVS_UNUSED) struct smap features = SMAP_INITIALIZER(&features); smap_add(&features, "ct_zero_snat", "false"); - ovs_assert(!ovs_feature_support_run(&features, NULL, 0)); + ovs_assert(!ovs_feature_support_run(&features, NULL, 0, NULL)); ovs_assert(!ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)); smap_replace(&features, "ct_zero_snat", "true"); - ovs_assert(ovs_feature_support_run(&features, NULL, 0)); + ovs_assert(ovs_feature_support_run(&features, NULL, 0, NULL)); ovs_assert(ovs_feature_is_supported(OVS_CT_ZERO_SNAT_SUPPORT)); smap_add(&features, "unknown_feature", "true"); - ovs_assert(!ovs_feature_support_run(&features, NULL, 0)); + ovs_assert(!ovs_feature_support_run(&features, NULL, 0, NULL)); smap_destroy(&features); } diff --git a/ovs b/ovs index c598c05c85..9e07a69bbb 160000 --- a/ovs +++ b/ovs @@ -1 +1 @@ -Subproject commit c598c05c85b2d38874a0ce8f7f088f6aae4fdabc +Subproject commit 9e07a69bbb2a7d3eb35f9b487161c50cfa5bd65e diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index e67ddfae53..7b392e28e2 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -3190,6 +3190,56 @@ OVS_WAIT_UNTIL([grep -q 'tcp:127.0.0.1:1235: connected' hv1/ovn-controller.log]) OVN_CLEANUP([hv1]) AT_CLEANUP +AT_SETUP([ovn-controller - br-int flow table prefixes]) +AT_KEYWORDS([ovn-controller prefixes]) +ovn_start + +net_add n1 +sim_add hv1 +ovs-vsctl add-br br-phys + +dnl Stop the database server before starting ovn-controller and convert the +dnl schema to support only 3 prefixes. +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) +AT_CHECK([sed 's/"max": 4}/"max": 3}/' $ovs_srcdir/vswitchd/vswitch.ovsschema > new_schema]) +AT_CHECK([ovsdb-tool convert hv1/conf.db new_schema]) +start_daemon ovsdb-server --remote=punix:db.sock + +dnl Start ovn-controller. +ovn_attach n1 br-phys 192.168.0.20 + +dnl Wait for exactly 3 prefixes to be configured. +OVS_WAIT_UNTIL( + [grep -q 'Setting flow table prefixes: ip_src, ip_dst, ipv6_src.' \ + hv1/ovn-controller.log]) +OVS_WAIT_FOR_OUTPUT([ovs-vsctl --columns=prefixes --bare list Flow_Table], [0], [dnl +ip_dst ip_src ipv6_src +]) + +dnl Remember the row UUID. +uuid=$(ovs-vsctl --columns=_uuid --bare list Flow_Table) + +dnl Stop the database server again and convert the schema back to support +dnl up to 4 prefixes. +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) +AT_CHECK([cat $ovs_srcdir/vswitchd/vswitch.ovsschema > new_schema]) +AT_CHECK([ovsdb-tool convert hv1/conf.db new_schema]) +start_daemon ovsdb-server --remote=punix:db.sock + +dnl Wait for exactly 4 prefixes to be configured. +OVS_WAIT_FOR_OUTPUT( + [grep -q 'Setting flow table prefixes: ip_src, ip_dst, ipv6_src, ipv6_dst.' \ + hv1/ovn-controller.log]) +OVS_WAIT_FOR_OUTPUT([ovs-vsctl --columns=prefixes --bare list Flow_Table], [0], [dnl +ip_dst ip_src ipv6_dst ipv6_src +]) + +dnl Check that the record was updated and not replaced. +AT_CHECK([test "$(ovs-vsctl --columns=_uuid --bare list Flow_Table)" = "${uuid}"]) + +OVN_CLEANUP([hv1]) +AT_CLEANUP + OVN_FOR_EACH_NORTHD([ AT_SETUP([ovn-controller - CT zone min/max boundaries]) ovn_start