Skip to content

Commit

Permalink
Handle dual ToR neighbor miss scenario (#2137)
Browse files Browse the repository at this point in the history
- When orchagent receives a neighbor update with a zero MAC:
    - If the neighbor IP is configured for a specific mux cable port in the MUX_CABLE table in CONFIG_DB, handle the neighbor normally (if active for the port, no action is needed. if standby, a tunnel route is created for the neighbor IP)
    - If the neighbor IP is not configured for a specific port, create a tunnel route for the IP to the peer switch.
        - When these neighbor IPs are eventually resolved, remove the tunnel route and handle the neighbor normally.
- When creating/initializing a mux cable object, set the internal state to standby to match the constructor behavior.

- Various formatting fixes inside test_mux.py
- Remove references to deprecated `@pytest.yield_fixture`
- Add dual ToR neighbor miss test cases:
    - Test cases and expected results are described in `mux_neigh_miss_tests.py`. These descriptions are used by the generic test runner `test_neighbor_miss` function to execute the test actions and verify expected results
    - Various setup fixtures and test info fixtures were added
    - Existing test cases were changed to use these setup fixtures for consistency

Signed-off-by: Lawrence Lee <lawlee@microsoft.com>
Co-authored-by: Sumukha Tumkur Vani <stumkurv@microsoft.com>
  • Loading branch information
theasianpianist and Sumukha Tumkur Vani authored Feb 16, 2022
1 parent 4bff5c6 commit 081dd01
Show file tree
Hide file tree
Showing 14 changed files with 756 additions and 138 deletions.
36 changes: 31 additions & 5 deletions neighsyncd/neighsync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
using namespace std;
using namespace swss;

NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb) :
NeighSync::NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConnector *cfgDb) :
m_neighTable(pipelineAppDB, APP_NEIGH_TABLE_NAME),
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME)
m_stateNeighRestoreTable(stateDb, STATE_NEIGH_RESTORE_TABLE_NAME),
m_cfgPeerSwitchTable(cfgDb, CFG_PEER_SWITCH_TABLE_NAME)
{
m_AppRestartAssist = new AppRestartAssist(pipelineAppDB, "neighsyncd", "swss", DEFAULT_NEIGHSYNC_WARMSTART_TIMER);
if (m_AppRestartAssist)
Expand Down Expand Up @@ -87,14 +88,39 @@ void NeighSync::onMsg(int nlmsg_type, struct nl_object *obj)
return;
}

std::vector<std::string> peerSwitchKeys;
bool delete_key = false;
if ((nlmsg_type == RTM_DELNEIGH) || (state == NUD_INCOMPLETE) ||
(state == NUD_FAILED))
bool use_zero_mac = false;
m_cfgPeerSwitchTable.getKeys(peerSwitchKeys);
bool is_dualtor = peerSwitchKeys.size() > 0;
if (is_dualtor && (state == NUD_INCOMPLETE || state == NUD_FAILED))
{
SWSS_LOG_INFO("Unable to resolve %s, setting zero MAC", key.c_str());
use_zero_mac = true;

// Unresolved neighbor deletion on dual ToR devices must be handled
// separately, otherwise delete_key is never set to true
// and neighorch is never able to remove the neighbor
if (nlmsg_type == RTM_DELNEIGH)
{
delete_key = true;
}
}
else if ((nlmsg_type == RTM_DELNEIGH) ||
(state == NUD_INCOMPLETE) || (state == NUD_FAILED))
{
delete_key = true;
}

nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
if (use_zero_mac)
{
std::string zero_mac = "00:00:00:00:00:00";
strncpy(macStr, zero_mac.c_str(), zero_mac.length());
}
else
{
nl_addr2str(rtnl_neigh_get_lladdr(neigh), macStr, MAX_ADDR_SIZE);
}

/* Ignore neighbor entries with Broadcast Mac - Trigger for directed broadcast */
if (!delete_key && (MacAddress(macStr) == MacAddress("ff:ff:ff:ff:ff:ff")))
Expand Down
4 changes: 2 additions & 2 deletions neighsyncd/neighsync.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class NeighSync : public NetMsg
public:
enum { MAX_ADDR_SIZE = 64 };

NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb);
NeighSync(RedisPipeline *pipelineAppDB, DBConnector *stateDb, DBConnector *cfgDb);
~NeighSync();

virtual void onMsg(int nlmsg_type, struct nl_object *obj);
Expand All @@ -36,7 +36,7 @@ class NeighSync : public NetMsg
}

private:
Table m_stateNeighRestoreTable;
Table m_stateNeighRestoreTable, m_cfgPeerSwitchTable;
ProducerStateTable m_neighTable;
AppRestartAssist *m_AppRestartAssist;
};
Expand Down
3 changes: 2 additions & 1 deletion neighsyncd/neighsyncd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ int main(int argc, char **argv)
DBConnector appDb("APPL_DB", 0);
RedisPipeline pipelineAppDB(&appDb);
DBConnector stateDb("STATE_DB", 0);
DBConnector cfgDb("CONFIG_DB", 0);

NeighSync sync(&pipelineAppDB, &stateDb);
NeighSync sync(&pipelineAppDB, &stateDb, &cfgDb);

NetDispatcher::getInstance().registerMessageHandler(RTM_NEWNEIGH, &sync);
NetDispatcher::getInstance().registerMessageHandler(RTM_DELNEIGH, &sync);
Expand Down
53 changes: 53 additions & 0 deletions orchagent/muxorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ MuxCable::MuxCable(string name, IpPrefix& srv_ip4, IpPrefix& srv_ip6, IpAddress

/* Set initial state to "standby" */
stateStandby();
state_ = MuxState::MUX_STATE_STANDBY;
}

bool MuxCable::stateInitActive()
Expand Down Expand Up @@ -1025,6 +1026,37 @@ void MuxOrch::updateNeighbor(const NeighborUpdate& update)
return;
}

auto standalone_tunnel_neigh_it = standalone_tunnel_neighbors_.find(update.entry.ip_address);
// Handling zero MAC neighbor updates
if (!update.mac)
{
/* For neighbors that were previously resolvable but are now unresolvable,
* we expect such neighbor entries to be deleted prior to a zero MAC update
* arriving for that same neighbor.
*/

if (update.add)
{
if (standalone_tunnel_neigh_it == standalone_tunnel_neighbors_.end())
{
createStandaloneTunnelRoute(update.entry.ip_address);
}
/* If the MAC address in the neighbor entry is zero but the neighbor IP
* is already present in standalone_tunnel_neighbors_, assume we have already
* added a tunnel route for it and exit early
*/
return;
}
}
/* If the update operation for a neighbor contains a non-zero MAC, we must
* make sure to remove any existing tunnel routes to prevent conflicts.
* This block also covers the case of neighbor deletion.
*/
if (standalone_tunnel_neigh_it != standalone_tunnel_neighbors_.end())
{
removeStandaloneTunnelRoute(update.entry.ip_address);
}

for (auto it = mux_cable_tb_.begin(); it != mux_cable_tb_.end(); it++)
{
MuxCable* ptr = it->second.get();
Expand Down Expand Up @@ -1292,6 +1324,27 @@ bool MuxOrch::delOperation(const Request& request)
return true;
}

void MuxOrch::createStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Creating standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
sai_object_id_t tunnel_nexthop = getNextHopTunnelId(MUX_TUNNEL, mux_peer_switch_);
if (tunnel_nexthop == SAI_NULL_OBJECT_ID) {
SWSS_LOG_NOTICE("%s nexthop not created yet, ignoring tunnel route creation for %s", MUX_TUNNEL, neighborIp.to_string().c_str());
return;
}
IpPrefix pfx = neighborIp.to_string();
create_route(pfx, tunnel_nexthop);
standalone_tunnel_neighbors_.insert(neighborIp);
}

void MuxOrch::removeStandaloneTunnelRoute(IpAddress neighborIp)
{
SWSS_LOG_INFO("Removing standalone tunnel route for neighbor %s", neighborIp.to_string().c_str());
IpPrefix pfx = neighborIp.to_string();
remove_route(pfx);
standalone_tunnel_neighbors_.erase(neighborIp);
}

MuxCableOrch::MuxCableOrch(DBConnector *db, DBConnector *sdb, const std::string& tableName):
Orch2(db, tableName, request_),
app_tunnel_route_table_(db, APP_TUNNEL_ROUTE_TABLE_NAME),
Expand Down
8 changes: 8 additions & 0 deletions orchagent/muxorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,13 @@ class MuxOrch : public Orch2, public Observer, public Subject

bool getMuxPort(const MacAddress&, const string&, string&);

/***
* Methods for managing tunnel routes for neighbor IPs not associated
* with a specific mux cable
***/
void createStandaloneTunnelRoute(IpAddress neighborIp);
void removeStandaloneTunnelRoute(IpAddress neighborIp);

IpAddress mux_peer_switch_ = 0x0;
sai_object_id_t mux_tunnel_id_ = SAI_NULL_OBJECT_ID;

Expand All @@ -210,6 +217,7 @@ class MuxOrch : public Orch2, public Observer, public Subject
FdbOrch *fdb_orch_;

MuxCfgRequest request_;
std::set<IpAddress> standalone_tunnel_neighbors_;
};

const request_description_t mux_cable_request_description = {
Expand Down
20 changes: 19 additions & 1 deletion orchagent/neighorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,16 @@ void NeighOrch::doTask(Consumer &consumer)
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end()
|| m_syncdNeighbors[neighbor_entry].mac != mac_address)
{
if (addNeighbor(neighbor_entry, mac_address))
// only for unresolvable neighbors that are new
if (!mac_address)
{
if (m_syncdNeighbors.find(neighbor_entry) == m_syncdNeighbors.end())
{
addZeroMacTunnelRoute(neighbor_entry, mac_address);
}
it = consumer.m_toSync.erase(it);
}
else if (addNeighbor(neighbor_entry, mac_address))
{
it = consumer.m_toSync.erase(it);
}
Expand Down Expand Up @@ -954,3 +963,12 @@ bool NeighOrch::removeTunnelNextHop(const NextHopKey& nh)
return true;
}

void NeighOrch::addZeroMacTunnelRoute(const NeighborEntry& entry, const MacAddress& mac)
{
SWSS_LOG_INFO("Creating tunnel route for neighbor %s", entry.ip_address.to_string().c_str());
MuxOrch* mux_orch = gDirectory.get<MuxOrch*>();
NeighborUpdate update = {entry, mac, true};
mux_orch->update(SUBJECT_TYPE_NEIGH_CHANGE, static_cast<void *>(&update));
m_syncdNeighbors[entry] = { mac, false };
}

2 changes: 2 additions & 0 deletions orchagent/neighorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ class NeighOrch : public Orch, public Subject, public Observer

bool resolveNeighborEntry(const NeighborEntry &, const MacAddress &);
void clearResolvedNeighborEntry(const NeighborEntry &);

void addZeroMacTunnelRoute(const NeighborEntry &, const MacAddress &);
};

#endif /* SWSS_NEIGHORCH_H */
16 changes: 8 additions & 8 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1529,7 +1529,7 @@ def verify_vct(self):
return ret1 and ret2


@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def dvs(request) -> DockerVirtualSwitch:
if sys.version_info[0] < 3:
raise NameError("Python 2 is not supported, please install python 3")
Expand Down Expand Up @@ -1559,7 +1559,7 @@ def dvs(request) -> DockerVirtualSwitch:
dvs.ctn_restart()


@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def vct(request):
vctns = request.config.getoption("--vctns")
topo = request.config.getoption("--topo")
Expand All @@ -1579,7 +1579,7 @@ def vct(request):
vct.destroy()


@pytest.yield_fixture
@pytest.fixture
def testlog(request, dvs):
dvs.runcmd(f"logger === start test {request.node.name} ===")
yield testlog
Expand All @@ -1602,13 +1602,13 @@ def dvs_route(request, dvs) -> DVSRoute:

# FIXME: The rest of these also need to be reverted back to normal fixtures to
# appease the linter.
@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_lag_manager(request, dvs):
request.cls.dvs_lag = dvs_lag.DVSLag(dvs.get_asic_db(),
dvs.get_config_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_vlan_manager(request, dvs):
request.cls.dvs_vlan = dvs_vlan.DVSVlan(dvs.get_asic_db(),
dvs.get_config_db(),
Expand All @@ -1617,7 +1617,7 @@ def dvs_vlan_manager(request, dvs):
dvs.get_app_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_mirror_manager(request, dvs):
request.cls.dvs_mirror = dvs_mirror.DVSMirror(dvs.get_asic_db(),
dvs.get_config_db(),
Expand All @@ -1626,7 +1626,7 @@ def dvs_mirror_manager(request, dvs):
dvs.get_app_db())


@pytest.yield_fixture(scope="class")
@pytest.fixture(scope="class")
def dvs_policer_manager(request, dvs):
request.cls.dvs_policer = dvs_policer.DVSPolicer(dvs.get_asic_db(),
dvs.get_config_db())
Expand All @@ -1647,7 +1647,7 @@ def remove_dpb_config_file(dvs):
dvs.runcmd(cmd)


@pytest.yield_fixture(scope="module")
@pytest.fixture(scope="module")
def dpb_setup_fixture(dvs):
create_dpb_config_file(dvs)
if dvs.vct is None:
Expand Down
Loading

0 comments on commit 081dd01

Please sign in to comment.