From aac52632d011a998872925fae98f67bc53f7c0a3 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 3 Feb 2025 22:15:43 +0100 Subject: [PATCH] mac-cache: Fix expiration of active FDB entry due to skipped update. ovn-controller doesn't update timestamps until 3/4 of the expiration threshold have passed, but then it may wait for another 3/4. And since 3/4 + 3/4 > 1, northd would remove the entry before ovn-controller wakes up to refresh it next time. Fix the issue the same way it is fixed for MAC bindings - by using cooldown period of 1/4 and the dump period of 1/2 of the threshold. NOTE: In the worst case this will increase the number of transactions to the database to 4 per FDB entry per aging period. Fixes: 551527a5e68e ("controller: Update FDB timestamp") Reported-at: https://issues.redhat.com/browse/FDP-1132 Signed-off-by: Ilya Maximets Acked-by: Ales Musil Signed-off-by: Dumitru Ceara (cherry picked from commit aac5017907615437f3c2a56e7355bf97537c0c7d) --- controller/mac-cache.c | 59 ++++++++++++++++++++++++++---- tests/ovn.at | 83 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 7 deletions(-) diff --git a/controller/mac-cache.c b/controller/mac-cache.c index 229dcc3f7e..861cfa62ab 100644 --- a/controller/mac-cache.c +++ b/controller/mac-cache.c @@ -512,6 +512,34 @@ fdb_stats_process_flow_stats(struct ovs_list *stats_list, ovs_list_push_back(stats_list, &stats->list_node); } +static void +fdb_update_log(const char *action, + const struct fdb_data *data, + bool print_times, + const struct mac_cache_threshold *threshold, + int64_t idle_age_ms, uint64_t since_updated_ms) +{ + if (!VLOG_IS_DBG_ENABLED()) { + return; + } + + struct ds s = DS_EMPTY_INITIALIZER; + + ds_put_cstr(&s, action); + ds_put_format(&s, " FDB entry (datapath-key: %"PRIu32", " + "port-key: %"PRIu32", mac: " ETH_ADDR_FMT, + data->dp_key, data->port_key, ETH_ADDR_ARGS(data->mac)); + if (print_times) { + ds_put_format(&s, "), last update: %"PRIu64"ms ago," + " idle age: %"PRIi64"ms, threshold: %"PRIu64"ms", + since_updated_ms, idle_age_ms, threshold->value); + } else { + ds_put_char(&s, ')'); + } + VLOG_DBG("%s.", ds_cstr_ro(&s)); + ds_destroy(&s); +} + void fdb_stats_run(struct ovs_list *stats_list, uint64_t *req_delay, void *data) @@ -524,26 +552,43 @@ fdb_stats_run(struct ovs_list *stats_list, uint64_t *req_delay, struct fdb *fdb = fdb_find(&cache_data->fdbs, &stats->data.fdb); if (!fdb) { + fdb_update_log("Not found in the cache:", &stats->data.fdb, + false, NULL, 0, 0); free(stats); continue; } + uint64_t since_updated_ms = timewall_now - fdb->sbrec_fdb->timestamp; struct mac_cache_threshold *threshold = mac_cache_threshold_find(cache_data, fdb->data.dp_key); - /* If "idle_age" is under threshold it means that the mac binding is - * used on this chassis. Also make sure that we don't update the - * timestamp more than once during the dump period. */ - if (stats->idle_age_ms < threshold->value && - (timewall_now - fdb->sbrec_fdb->timestamp) >= - threshold->dump_period) { - sbrec_fdb_set_timestamp(fdb->sbrec_fdb, timewall_now); + /* If "idle_age" is under threshold it means that the fdb entry is + * used on this chassis. */ + if (stats->idle_age_ms < threshold->value) { + if (since_updated_ms >= threshold->cooldown_period) { + fdb_update_log("Updating active", &fdb->data, true, + threshold, stats->idle_age_ms, + since_updated_ms); + sbrec_fdb_set_timestamp(fdb->sbrec_fdb, timewall_now); + } else { + /* Postponing the update to avoid sending database transactions + * too frequently. */ + fdb_update_log("Not updating active", &fdb->data, true, + threshold, stats->idle_age_ms, + since_updated_ms); + } + } else { + fdb_update_log("Not updating non-active", &fdb->data, true, + threshold, stats->idle_age_ms, since_updated_ms); } free(stats); } mac_cache_update_req_delay(&cache_data->thresholds, req_delay); + if (*req_delay) { + VLOG_DBG("FDB entry statistics dalay: %"PRIu64, *req_delay); + } } /* Packet buffering. */ diff --git a/tests/ovn.at b/tests/ovn.at index 50031826e2..0ac74dba12 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -37163,6 +37163,7 @@ ovs-vsctl -- add-port br-phys ext0 -- \ options:rxq_pcap=hv1/ext0-rx.pcap \ ofport-request=2 ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys +ovn-appctl -t ovn-controller vlog/set mac_cache:file:dbg pinctrl:file:dbg OVN_POPULATE_ARP wait_for_ports_up @@ -37208,6 +37209,88 @@ OVN_CLEANUP([hv1]) AT_CLEANUP ]) +OVN_FOR_EACH_NORTHD([ +AT_SETUP([FDB aging - persistence of the active entry]) +AT_SKIP_IF([test $HAVE_SCAPY = no]) +ovn_start + +net_add n1 + +check ovn-nbctl ls-add ls0 + +check ovn-nbctl lsp-add ls0 ln_port -- \ + lsp-set-addresses ln_port unknown -- \ + lsp-set-type ln_port localnet -- \ + lsp-set-options ln_port network_name=physnet1 -- \ + set logical_switch_port ln_port options:localnet_learn_fdb=true -- \ + set logical_switch ls0 other_config:fdb_age_threshold=4 + +check ovn-nbctl lsp-add ls0 vif1 -- \ + lsp-set-addresses vif1 "00:00:00:00:10:10 192.168.10.10" + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-underlay +ovn_attach n1 br-underlay 192.168.0.1 +ovs-vsctl add-br br-phys +ovs-vsctl -- add-port br-int vif1 -- \ + set interface vif1 external-ids:iface-id=vif1 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 +ovs-vsctl -- add-port br-phys ext0 -- \ + set interface ext0 \ + options:tx_pcap=hv1/ext0-tx.pcap \ + options:rxq_pcap=hv1/ext0-rx.pcap \ + ofport-request=2 +ovs-vsctl set open . external_ids:ovn-bridge-mappings=physnet1:br-phys +ovn-appctl -t ovn-controller vlog/set mac_cache:file:dbg pinctrl:file:dbg + +OVN_POPULATE_ARP +wait_for_ports_up +check ovn-nbctl --wait=hv sync + +send_packet() { + packet=$(fmt_pkt " + Ether(dst='00:00:00:00:10:10', src='00:00:00:00:10:${1}') / + IP(src='192.168.10.${1}', dst='192.168.10.10') / + UDP(sport=1234, dport=1235) + ") + check ovs-appctl netdev-dummy/receive ext0 $packet +} + +# Send packets to create FDB entries. Wait between two, so they have an +# initial difference in timestamps around half the age threshold. +send_packet 20 +wait_row_count fdb 1 mac='"00:00:00:00:10:20"' +sleep 2 +send_packet 30 +wait_row_count fdb 1 mac='"00:00:00:00:10:30"' + +timestamp=$(fetch_column fdb timestamp mac='"00:00:00:00:10:20"') + +uuid=$(fetch_column fdb _uuid mac='"00:00:00:00:10:30"') +for i in $(seq 12); do + # Keep one entry alive by sending traffic that uses it. + send_packet 30 + sleep 1 + # The entry must not expire. + check_row_count fdb 1 mac='"00:00:00:00:10:30"' + as hv1 ovs-ofctl dump-flows br-int table=OFTABLE_LOOKUP_FDB +done +# Check that it's the same entry. +uuid2=$(fetch_column fdb _uuid mac='"00:00:00:00:10:30"') +check test "$uuid" = "$uuid2" + +# The other entry must have expired by now. +wait_row_count fdb 0 mac='"00:00:00:00:10:20"' +# Wait for the alive one to expire as well. +wait_row_count fdb 0 mac='"00:00:00:00:10:30"' + +OVN_CLEANUP([hv1]) +AT_CLEANUP +]) + OVN_FOR_EACH_NORTHD([ AT_SETUP([DNAT_SNAT and LB traffic]) AT_KEYWORDS([dnat-snat-lb])