Skip to content

Commit

Permalink
Add more debug information when PFC WD is triggered (#2858)
Browse files Browse the repository at this point in the history
Add more debug information when PFC WD is triggered
  • Loading branch information
stephenxs authored Oct 30, 2023
1 parent a9867e6 commit 917c21e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 12 deletions.
18 changes: 17 additions & 1 deletion orchagent/pfc_detect_mellanox.lua
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ local rets = {}

redis.call('SELECT', counters_db)

-- Record the polling time
local timestamp_last = redis.call('HGET', 'TIMESTAMP', 'pfcwd_poll_timestamp_last')
local timestamp_struct = redis.call('TIME')
local timestamp_current = timestamp_struct[1] + timestamp_struct[2] / 1000000
local timestamp_string = tostring(timestamp_current)
redis.call('HSET', 'TIMESTAMP', 'pfcwd_poll_timestamp_last', timestamp_string)
local real_poll_time = poll_time
if timestamp_last ~= false then
real_poll_time = (timestamp_current - tonumber(timestamp_last)) * 1000000
end

-- Iterate through each queue
local n = table.getn(KEYS)
for i = n, 1, -1 do
Expand Down Expand Up @@ -78,7 +89,12 @@ for i = n, 1, -1 do
if time_left <= poll_time then
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
local occupancy_string = '"occupancy","' .. tostring(occupancy_bytes) .. '",'
local packets_string = '"packets","' .. tostring(packets) .. '","packets_last","' .. tostring(packets_last) .. '",'
local pfc_rx_packets_string = '"pfc_rx_packets","' .. tostring(pfc_rx_packets) .. '","pfc_rx_packets_last","' .. tostring(pfc_rx_packets_last) .. '",'
local storm_condition_string = '"pfc_duration","' .. tostring(pfc_duration) .. '","pfc_duration_last","' .. tostring(pfc_duration_last) .. '",'
local timestamps = '"timestamp","' .. timestamp_string .. '","timestamp_last","' .. timestamp_last .. '","real_poll_time","' .. real_poll_time .. '"'
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm",' .. occupancy_string .. packets_string .. pfc_rx_packets_string .. storm_condition_string .. timestamps .. ']')
is_deadlock = true
time_left = detection_time
else
Expand Down
40 changes: 32 additions & 8 deletions orchagent/pfcwdorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,10 +913,20 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(swss::NotificationConsumer

wdNotification.pop(queueIdStr, event, values);

string info;
for (auto &fv : values)
{
info += fvField(fv) + ":" + fvValue(fv) + "|";
}
if (!info.empty())
{
info.pop_back();
}

sai_object_id_t queueId = SAI_NULL_OBJECT_ID;
sai_deserialize_object_id(queueIdStr, queueId);

if (!startWdActionOnQueue(event, queueId))
if (!startWdActionOnQueue(event, queueId, info))
{
SWSS_LOG_ERROR("Failed to start PFC watchdog %s event action on queue %s", event.c_str(), queueIdStr.c_str());
}
Expand All @@ -939,26 +949,40 @@ void PfcWdSwOrch<DropHandler, ForwardHandler>::doTask(SelectableTimer &timer)

template <typename DropHandler, typename ForwardHandler>
void PfcWdSwOrch<DropHandler, ForwardHandler>::report_pfc_storm(
sai_object_id_t id, const PfcWdQueueEntry *entry)
sai_object_id_t id, const PfcWdQueueEntry *entry, const string &info)
{
event_params_t params = {
{ "ifname", entry->portAlias },
{ "queue_index", to_string(entry->index) },
{ "queue_id", to_string(id) },
{ "port_id", to_string(entry->portId) }};

SWSS_LOG_NOTICE(
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%" PRIx64 " and port id 0x%" PRIx64 ".",
if (info.empty())
{
SWSS_LOG_NOTICE(
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%" PRIx64 " and port id 0x%" PRIx64,
entry->portAlias.c_str(),
entry->index,
id,
entry->portId);
}
else
{
SWSS_LOG_NOTICE(
"PFC Watchdog detected PFC storm on port %s, queue index %d, queue id 0x%" PRIx64 " and port id 0x%" PRIx64 ", additional info: %s.",
entry->portAlias.c_str(),
entry->index,
id,
entry->portId,
info.c_str());
params["additional_info"] = info;
}

event_publish(g_events_handle, "pfc-storm", &params);
}

template <typename DropHandler, typename ForwardHandler>
bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string &event, sai_object_id_t queueId)
bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string &event, sai_object_id_t queueId, const string &info)
{
auto entry = m_entryMap.find(queueId);
if (entry == m_entryMap.end())
Expand All @@ -979,7 +1003,7 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string
{
if (entry->second.handler == nullptr)
{
report_pfc_storm(entry->first, &entry->second);
report_pfc_storm(entry->first, &entry->second, info);

entry->second.handler = make_shared<PfcWdActionHandler>(
entry->second.portId,
Expand All @@ -996,7 +1020,7 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string
{
if (entry->second.handler == nullptr)
{
report_pfc_storm(entry->first, &entry->second);
report_pfc_storm(entry->first, &entry->second, info);

entry->second.handler = make_shared<DropHandler>(
entry->second.portId,
Expand All @@ -1013,7 +1037,7 @@ bool PfcWdSwOrch<DropHandler, ForwardHandler>::startWdActionOnQueue(const string
{
if (entry->second.handler == nullptr)
{
report_pfc_storm(entry->first, &entry->second);
report_pfc_storm(entry->first, &entry->second, info);

entry->second.handler = make_shared<ForwardHandler>(
entry->second.portId,
Expand Down
6 changes: 3 additions & 3 deletions orchagent/pfcwdorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class PfcWdOrch: public Orch
void setPfcDlrPacketAction(PfcWdAction action) { PfcDlrPacketAction = action; }

protected:
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) = 0;
virtual bool startWdActionOnQueue(const string &event, sai_object_id_t queueId, const string &info="") = 0;
string m_platform = "";
private:

Expand Down Expand Up @@ -96,7 +96,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
void doTask() override;

protected:
bool startWdActionOnQueue(const string &event, sai_object_id_t queueId) override;
bool startWdActionOnQueue(const string &event, sai_object_id_t queueId, const string &info="") override;

private:
struct PfcWdQueueEntry
Expand Down Expand Up @@ -128,7 +128,7 @@ class PfcWdSwOrch: public PfcWdOrch<DropHandler, ForwardHandler>
void enableBigRedSwitchMode();
void setBigRedSwitchMode(string value);

void report_pfc_storm(sai_object_id_t id, const PfcWdQueueEntry *);
void report_pfc_storm(sai_object_id_t id, const PfcWdQueueEntry *, const string&);

map<sai_object_id_t, PfcWdQueueEntry> m_entryMap;
map<sai_object_id_t, PfcWdQueueEntry> m_brsEntryMap;
Expand Down

0 comments on commit 917c21e

Please sign in to comment.