Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3/3] Spec-compliant CASE eviction policy algorithm #19903

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 85 additions & 6 deletions src/controller/python/test/test_scripts/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,25 +370,104 @@ async def TestCaseEviction(self, nodeid: int):
# of eviction, just that allocation and CASE session establishment proceeds successfully on both
# the controller and target.
#
for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 3):
for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 2):
self.devCtrl.CloseSession(nodeid)
await self.devCtrl.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)])

self.logger.info("Testing CASE defunct logic")

#
# This tests establishing a subscription on a given CASE session, then mark it defunct (to simulate
# This tests establishes a subscription on a given CASE session, then marks it defunct (to simulate
# encountering a transport timeout on the session).
#
# At the max interval, we should still have a valid subscription.
# Then, we write to the attribute that was subscribed to from a *different* fabric and check to ensure we still get a report
# on the sub we established previously. Since it was just marked defunct, it should return back to being
# active and a report should get delivered.
#
sawValueChange = False

def OnValueChange(path: Attribute.TypedAttributePath, transaction: Attribute.SubscriptionTransaction) -> None:
nonlocal sawValueChange
self.logger.info("Saw value change!")
if (path.AttributeType == Clusters.TestCluster.Attributes.Int8u and path.Path.EndpointId == 1):
sawValueChange = True

self.logger.info("Testing CASE defunct logic")

sub = await self.devCtrl.ReadAttribute(nodeid, [(Clusters.TestCluster.Attributes.Int8u)], reportInterval=(0, 1))
sub.SetAttributeUpdateCallback(OnValueChange)

#
# This marks the session defunct.
#
sub = await self.devCtrl.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)], reportInterval=(0, 2))
await asyncio.sleep(2)
self.devCtrl.CloseSession(nodeid)
await asyncio.sleep(4)

#
# Now write the attribute from fabric2, give it some time before checking if the report
# was received.
#
await self.devCtrl2.WriteAttribute(nodeid, [(1, Clusters.TestCluster.Attributes.Int8u(4))])
time.sleep(2)

sub.Shutdown()

if sawValueChange is False:
self.logger.error("Didn't see value change in time, likely because sub got terminated due to unexpected session eviction!")
return False

#
# In this test, we're going to setup a subscription on fabric1 through devCtl, then, constantly keep
# evicting sessions on fabric2 (devCtl2) by cycling through closing sessions followed by issuing a Read. This
# should result in evictions on the server on fabric2, but not affect any sessions on fabric1. To test this,
# we're going to setup a subscription to an attribute prior to the cycling reads, and check at the end of the
# test that it's still valid by writing to an attribute from a *different* fabric, and validating that we see
# the change on the established subscription. That proves that the session from fabric1 is still valid and untouched.
#
self.logger.info("Testing fabric-isolated CASE eviction")

sawValueChange = False
sub = await self.devCtrl.ReadAttribute(nodeid, [(Clusters.TestCluster.Attributes.Int8u)], reportInterval=(0, 1))
sub.SetAttributeUpdateCallback(OnValueChange)

for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 2):
self.devCtrl2.CloseSession(nodeid)
await self.devCtrl2.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)])

#
# Now write the attribute from fabric2, give it some time before checking if the report
# was received.
#
await self.devCtrl2.WriteAttribute(nodeid, [(1, Clusters.TestCluster.Attributes.Int8u(4))])
time.sleep(2)

sub.Shutdown()

if sawValueChange is False:
self.logger.error("Didn't see value change in time, likely because sub got terminated due to other fabric (fabric1)")
return False

#
# Do the same test again, but reversing the roles of fabric1 and fabric2.
#
self.logger.info("Testing fabric-isolated CASE eviction (reverse)")

sawValueChange = False
sub = await self.devCtrl2.ReadAttribute(nodeid, [(Clusters.TestCluster.Attributes.Int8u)], reportInterval=(0, 1))
sub.SetAttributeUpdateCallback(OnValueChange)

for x in range(minimumSupportedFabrics * minimumCASESessionsPerFabric * 2):
self.devCtrl.CloseSession(nodeid)
await self.devCtrl.ReadAttribute(nodeid, [(Clusters.Basic.Attributes.ClusterRevision)])

await self.devCtrl.WriteAttribute(nodeid, [(1, Clusters.TestCluster.Attributes.Int8u(4))])
time.sleep(2)

sub.Shutdown()

if sawValueChange is False:
self.logger.error("Didn't see value change in time, likely because sub got terminated due to other fabric (fabric2)")
return False

return True

async def TestMultiFabric(self, ip: str, setuppin: int, nodeid: int):
Expand Down
16 changes: 3 additions & 13 deletions src/controller/python/test/test_scripts/mobile-device-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,24 +76,14 @@ def ethernet_commissioning(test: BaseTestHelper, discriminator: int, setup_pin:
nodeid=device_nodeid),
"Failed to finish key exchange")

#
# Run this before the MultiFabric test, since it will result in the resultant CASE session
# on fabric2 to be evicted (due to the stressful nature of that test) on the target.
#
# It doesn't actually evict the CASE session for fabric2 on the client, since we prioritize
# defunct sessions for eviction first, which means our CASE session on fabric2 remains preserved
# throughout the stress test. This results in a mis-match later.
#
# TODO: Once we implement fabric-adjusted LRU, we should see if this issue remains (it shouldn't)
#
logger.info("Testing CASE Eviction")
FailIfNot(asyncio.run(test.TestCaseEviction(device_nodeid)), "Failed TestCaseEviction")

ok = asyncio.run(test.TestMultiFabric(ip=address,
setuppin=20202021,
nodeid=1))
FailIfNot(ok, "Failed to commission multi-fabric")

logger.info("Testing CASE Eviction")
FailIfNot(asyncio.run(test.TestCaseEviction(device_nodeid)), "Failed TestCaseEviction")

logger.info("Testing closing sessions")
FailIfNot(test.TestCloseSession(nodeid=device_nodeid), "Failed to close sessions")

Expand Down
2 changes: 2 additions & 0 deletions src/transport/SecureSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ class SecureSession : public Session, public ReferenceCounted<SecureSession, Sec
void MoveToState(State targetState);

friend class SecureSessionDeleter;
friend class TestSecureSessionTable;

SecureSessionTable & mTable;
State mState;
const Type mSecureSessionType;
Expand Down
144 changes: 123 additions & 21 deletions src/transport/SecureSessionTable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Optional<SessionHandle> SecureSessionTable::CreateNewSecureSession(SecureSession
// to run the eviction algorithm to get a free slot. We shall ALWAYS be guaranteed to evict
// an existing session in the table in normal operating circumstances.
//
if (mEntries.Allocated() < CHIP_CONFIG_SECURE_SESSION_POOL_SIZE)
if (mEntries.Allocated() < GetMaxSessionTableSize())
{
allocated = mEntries.CreateObject(*this, secureSessionType, sessionId.Value());
}
Expand Down Expand Up @@ -102,47 +102,92 @@ SecureSession * SecureSessionTable::EvictAndAllocate(uint16_t localSessionId, Se
ChipLogProgress(SecureChannel, "Evicting a slot for session with LSID: %d, type: %u", localSessionId,
(uint8_t) secureSessionType);

VerifyOrDie(mEntries.Allocated() <= CHIP_CONFIG_SECURE_SESSION_POOL_SIZE);
VerifyOrDie(mEntries.Allocated() <= GetMaxSessionTableSize());

//
// Create a temporary list of objects each of which points to a session in the existing
// session table, but are swappable. This allows them to then be used with a sorting algorithm
// without affecting the sessions in the table itself.
//
Platform::ScopedMemoryBufferWithSize<SortableSession> sortableSessions;
sortableSessions.Calloc(mEntries.Allocated());
if (!sortableSessions)
{
VerifyOrDieWithMsg(false, SecureChannel, "We couldn't allocate a session!");
return nullptr;
}
// The size of this shouldn't place significant demands on the stack if using the default
// configuration for CHIP_CONFIG_SECURE_SESSION_POOL_SIZE (17). Each item is
// 8 bytes in size (on a 32-bit platform), and 16 bytes in size (on a 64-bit platform,
// including padding).
//
// Total size of this stack variable = 17 * 8 = 136bytes (32-bit platform), 272 bytes (64-bit platform).
mrjerryjohns marked this conversation as resolved.
Show resolved Hide resolved
//
// Even if the define is set to a large value, it's likely not so bad on the sort of platform setup
// that would have that sort of pool size.
//
// We need to sort (as opposed to just a linear search for the smallest/largest item)
// since it is possible that the candidate selected for eviction may not actually be
// released once marked for expiration (see comments below for more details).
//
// Consequently, we may need to walk the candidate list till we find one that is.
// Sorting provides a better overall performance model in this scheme.
//
// (#19967): Investigate doing linear search instead.
//
//
SortableSession sortableSessions[CHIP_CONFIG_SECURE_SESSION_POOL_SIZE];
mrjerryjohns marked this conversation as resolved.
Show resolved Hide resolved

unsigned int index = 0;

//
// Compute two key stats for each session - the number of other sessions that
// match its fabric, as well as the number of other sessions that match its peer.
//
// This will be used by the session eviction algorithm later.
//
ForEachSession([&index, &sortableSessions, this](auto * session) {
sortableSessions[index].mSession = session;
sortableSessions[index].mNumMatchingOnFabric = 0;
sortableSessions[index].mNumMatchingOnPeer = 0;

ForEachSession([session, index, &sortableSessions](auto * otherSession) {
if (session != otherSession)
{
if (session->GetFabricIndex() == otherSession->GetFabricIndex())
{
sortableSessions[index].mNumMatchingOnFabric++;

if (session->GetPeerNodeId() == otherSession->GetPeerNodeId())
{
sortableSessions[index].mNumMatchingOnPeer++;
}
}
}

return Loop::Continue;
});

int index = 0;
ForEachSession([&index, &sortableSessions](auto session) {
sortableSessions.Get()[index].mSession = session;
index++;
return Loop::Continue;
});

auto sortableSessionSpan = Span<SortableSession>(sortableSessions.Get(), mEntries.Allocated());
auto sortableSessionSpan = Span<SortableSession>(sortableSessions, mEntries.Allocated());
EvictionPolicyContext policyContext(sortableSessionSpan, sessionEvictionHint);

DefaultEvictionPolicy(policyContext);
ChipLogProgress(SecureChannel, "Sorted sessions for eviction...");

auto numSessions = mEntries.Allocated();
const auto numSessions = mEntries.Allocated();

mrjerryjohns marked this conversation as resolved.
Show resolved Hide resolved
#if CHIP_DETAIL_LOGGING
ChipLogDetail(SecureChannel, "Sorted Eviction Candidates (ranked from best candidate to worst):");
for (auto * session = sortableSessions.Get(); session != (sortableSessions.Get() + numSessions); session++)
for (auto * session = sortableSessions; session != (sortableSessions + numSessions); session++)
{
ChipLogDetail(SecureChannel, "\t%ld: [%p] -- State: '%s', ActivityTime: %lu",
static_cast<long int>(session - sortableSessions.Get()), session->mSession, session->mSession->GetStateStr(),
ChipLogDetail(SecureChannel,
"\t%ld: [%p] -- Peer: [%u:" ChipLogFormatX64
"] State: '%s', NumMatchingOnFabric: %d NumMatchingOnPeer: %d ActivityTime: %lu",
static_cast<long int>(session - sortableSessions), session->mSession,
session->mSession->GetPeer().GetFabricIndex(), ChipLogValueX64(session->mSession->GetPeer().GetNodeId()),
session->mSession->GetStateStr(), session->mNumMatchingOnFabric, session->mNumMatchingOnPeer,
static_cast<unsigned long>(session->mSession->GetLastActivityTime().count()));
}
#endif

for (auto * session = sortableSessions.Get(); session != (sortableSessions.Get() + numSessions); session++)
for (auto * session = sortableSessions; session != (sortableSessions + numSessions); session++)
{
if (session->mSession->IsPendingEviction())
{
Expand Down Expand Up @@ -183,9 +228,55 @@ SecureSession * SecureSessionTable::EvictAndAllocate(uint16_t localSessionId, Se

void SecureSessionTable::DefaultEvictionPolicy(EvictionPolicyContext & evictionContext)
{
evictionContext.Sort([](const auto & a, const auto & b) {
int aStateScore = 0, bStateScore = 0;
//
// This implements a spec-compliant sorting policy that ensures both guarantees for sessions per-fabric as
// mandated by the spec as well as fairness in terms of selecting the most appropriate session to evict
// based on multiple criteria.
//
// See the description of this function in the header for more details on each sorting key below.
//
evictionContext.Sort([&evictionContext](const SortableSession & a, const SortableSession & b) -> bool {
//
// Sorting on Key1
mrjerryjohns marked this conversation as resolved.
Show resolved Hide resolved
//
if (a.mNumMatchingOnFabric != b.mNumMatchingOnFabric)
{
return a.mNumMatchingOnFabric > b.mNumMatchingOnFabric;
}

bool doesAMatchSessionHintFabric =
a.mSession->GetPeer().GetFabricIndex() == evictionContext.GetSessionEvictionHint().GetFabricIndex();
bool doesBMatchSessionHintFabric =
b.mSession->GetPeer().GetFabricIndex() == evictionContext.GetSessionEvictionHint().GetFabricIndex();

//
// Sorting on Key2
//
if (doesAMatchSessionHintFabric != doesBMatchSessionHintFabric)
{
return doesAMatchSessionHintFabric > doesBMatchSessionHintFabric;
}

//
// Sorting on Key3
//
if (a.mNumMatchingOnPeer != b.mNumMatchingOnPeer)
{
return a.mNumMatchingOnPeer > b.mNumMatchingOnPeer;
}

int doesAMatchSessionHint = a.mSession->GetPeer() == evictionContext.GetSessionEvictionHint();
int doesBMatchSessionHint = b.mSession->GetPeer() == evictionContext.GetSessionEvictionHint();

//
// Sorting on Key4
//
if (doesAMatchSessionHint != doesBMatchSessionHint)
{
return doesAMatchSessionHint > doesBMatchSessionHint;
}

int aStateScore = 0, bStateScore = 0;
auto assignStateScore = [](auto & score, const auto & session) {
if (session.IsDefunct())
{
Expand All @@ -204,7 +295,18 @@ void SecureSessionTable::DefaultEvictionPolicy(EvictionPolicyContext & evictionC
assignStateScore(aStateScore, *a.mSession);
assignStateScore(bStateScore, *b.mSession);

return ((aStateScore > bStateScore) ? true : (a->GetLastActivityTime() < b->GetLastActivityTime()));
//
// Sorting on Key5
//
if (aStateScore != bStateScore)
{
return (aStateScore > bStateScore);
}

//
// Sorting on Key6
//
return (a->GetLastActivityTime() < b->GetLastActivityTime());
});
}

Expand Down
Loading