Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Improve thread pool worker thread's spinning for work #13921

Merged
merged 8 commits into from
Sep 22, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,12 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_ForceMaxWorkerThreads, W("ThreadPoo
RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_DisableStarvationDetection, W("ThreadPool_DisableStarvationDetection"), 0, "Disables the ThreadPool feature that forces new threads to be added when workitems run for too long")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_DebugBreakOnWorkerStarvation, W("ThreadPool_DebugBreakOnWorkerStarvation"), 0, "Breaks into the debugger if the ThreadPool detects work queue starvation")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_EnableWorkerTracking, W("ThreadPool_EnableWorkerTracking"), 0, "Enables extra expensive tracking of how many workers threads are working simultaneously")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_UnfairSemaphoreSpinLimit, W("ThreadPool_UnfairSemaphoreSpinLimit"), 50, "Per processor limit used when calculating spin duration in UnfairSemaphore::Wait")
#ifdef _TARGET_ARM64_
// Spinning scheme is currently different on ARM64, see CLRLifoSemaphore::Wait(DWORD, UINT32, UINT32)
RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_UnfairSemaphoreSpinLimit, W("ThreadPool_UnfairSemaphoreSpinLimit"), 0x32, "Maximum number of spins per processor a thread pool worker thread performs before waiting for work")
#else // !_TARGET_ARM64_
RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_UnfairSemaphoreSpinLimit, W("ThreadPool_UnfairSemaphoreSpinLimit"), 0x46, "Maximum number of spins a thread pool worker thread performs before waiting for work")
#endif // _TARGET_ARM64_
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Thread_UseAllCpuGroups, W("Thread_UseAllCpuGroups"), 0, "Specifies if to automatically distribute thread across CPU Groups")

CONFIG_DWORD_INFO(INTERNAL_ThreadpoolTickCountAdjustment, W("ThreadpoolTickCountAdjustment"), 0, "")
Expand Down
7 changes: 7 additions & 0 deletions src/pal/inc/pal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,13 @@ WaitForSingleObject(
IN HANDLE hHandle,
IN DWORD dwMilliseconds);

PALIMPORT
DWORD
PALAPI
PAL_WaitForSingleObjectPrioritized(
IN HANDLE hHandle,
IN DWORD dwMilliseconds);

PALIMPORT
DWORD
PALAPI
Expand Down
3 changes: 2 additions & 1 deletion src/pal/src/include/pal/corunix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,8 @@ namespace CorUnix
RegisterWaitingThread(
WaitType eWaitType,
DWORD dwIndex,
bool fAltertable
bool fAltertable,
bool fPrioritize
) = 0;

//
Expand Down
3 changes: 2 additions & 1 deletion src/pal/src/include/pal/synchobjects.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ namespace CorUnix
CONST HANDLE *lpHandles,
BOOL bWaitAll,
DWORD dwMilliseconds,
BOOL bAlertable);
BOOL bAlertable,
BOOL bPrioritize = FALSE);

PAL_ERROR InternalSleepEx(
CPalThread * pthrCurrent,
Expand Down
152 changes: 111 additions & 41 deletions src/pal/src/synchmgr/synchcontrollers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ namespace CorUnix
PAL_ERROR CSynchWaitController::RegisterWaitingThread(
WaitType wtWaitType,
DWORD dwIndex,
bool fAlertable)
bool fAlertable,
bool fPrioritize)
{
VALIDATEOBJECT(m_psdSynchData);

Expand Down Expand Up @@ -421,12 +422,12 @@ namespace CorUnix
// Add new node to queue
if (fSharedObject)
{
m_psdSynchData->SharedWaiterEnqueue(shridNewNode);
m_psdSynchData->SharedWaiterEnqueue(shridNewNode, fPrioritize);
ptwiWaitInfo->lSharedObjCount += 1;
}
else
{
m_psdSynchData->WaiterEnqueue(pwtlnNewNode);
m_psdSynchData->WaiterEnqueue(pwtlnNewNode, fPrioritize);
}

// Succeeded: update object count
Expand Down Expand Up @@ -1821,7 +1822,7 @@ namespace CorUnix
Note: this method must be called while holding the local process
synchronization lock.
--*/
void CSynchData::WaiterEnqueue(WaitingThreadsListNode * pwtlnNewNode)
void CSynchData::WaiterEnqueue(WaitingThreadsListNode * pwtlnNewNode, bool fPrioritize)
{
VALIDATEOBJECT(this);
VALIDATEOBJECT(pwtlnNewNode);
Expand All @@ -1833,26 +1834,55 @@ namespace CorUnix
"Trying to add a WaitingThreadsListNode marked as shared "
"as it was a local one\n");

WaitingThreadsListNode * pwtlnCurrLast = m_ptrWTLTail.ptr;

pwtlnNewNode->ptrNext.ptr = NULL;
if (NULL == pwtlnCurrLast)
if (!fPrioritize)
{
_ASSERT_MSG(NULL == m_ptrWTLHead.ptr,
"Corrupted waiting list on local CSynchData @ %p\n",
this);
// Enqueue normally to the end of the queue
WaitingThreadsListNode * pwtlnCurrLast = m_ptrWTLTail.ptr;

pwtlnNewNode->ptrNext.ptr = NULL;
if (NULL == pwtlnCurrLast)
{
_ASSERT_MSG(NULL == m_ptrWTLHead.ptr,
"Corrupted waiting list on local CSynchData @ %p\n",
this);

pwtlnNewNode->ptrPrev.ptr = NULL;
m_ptrWTLHead.ptr = pwtlnNewNode;
m_ptrWTLTail.ptr = pwtlnNewNode;
pwtlnNewNode->ptrPrev.ptr = NULL;
m_ptrWTLHead.ptr = pwtlnNewNode;
m_ptrWTLTail.ptr = pwtlnNewNode;
}
else
{
VALIDATEOBJECT(pwtlnCurrLast);

pwtlnNewNode->ptrPrev.ptr = pwtlnCurrLast;
pwtlnCurrLast->ptrNext.ptr = pwtlnNewNode;
m_ptrWTLTail.ptr = pwtlnNewNode;
}
}
else
{
VALIDATEOBJECT(pwtlnCurrLast);
// The wait is prioritized, enqueue to the beginning of the queue
WaitingThreadsListNode * pwtlnCurrFirst = m_ptrWTLHead.ptr;

pwtlnNewNode->ptrPrev.ptr = NULL;
if (NULL == pwtlnCurrFirst)
{
_ASSERT_MSG(NULL == m_ptrWTLTail.ptr,
"Corrupted waiting list on local CSynchData @ %p\n",
this);

pwtlnNewNode->ptrNext.ptr = NULL;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: couldn't we always assign this to pwtlnCurrFirst before the if rather than assigning it to null here and then to that value in the else?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be done either way, both the prev and next need to be assigned on all paths

m_ptrWTLHead.ptr = pwtlnNewNode;
m_ptrWTLTail.ptr = pwtlnNewNode;
}
else
{
VALIDATEOBJECT(pwtlnCurrFirst);

pwtlnNewNode->ptrPrev.ptr = pwtlnCurrLast;
pwtlnCurrLast->ptrNext.ptr = pwtlnNewNode;
m_ptrWTLTail.ptr = pwtlnNewNode;
pwtlnNewNode->ptrNext.ptr = pwtlnCurrFirst;
pwtlnCurrFirst->ptrPrev.ptr = pwtlnNewNode;
m_ptrWTLHead.ptr = pwtlnNewNode;
}
}

m_ulcWaitingThreads += 1;
Expand All @@ -1872,45 +1902,85 @@ namespace CorUnix
Note: this method must be called while holding both local and shared
synchronization locks.
--*/
void CSynchData::SharedWaiterEnqueue(SharedID shridNewNode)
void CSynchData::SharedWaiterEnqueue(SharedID shridNewNode, bool fPrioritize)
{
VALIDATEOBJECT(this);

_ASSERT_MSG(SharedObject == GetObjectDomain(),
"Trying to enqueue a WaitingThreadsListNode as shared "
"on a local object\n");

SharedID shridCurrLast;
WaitingThreadsListNode * pwtlnCurrLast, * pwtlnNewNode;
if (!fPrioritize)
{
// Enqueue normally to the end of the queue
SharedID shridCurrLast;
WaitingThreadsListNode * pwtlnCurrLast, * pwtlnNewNode;

shridCurrLast = m_ptrWTLTail.shrid;
pwtlnCurrLast = SharedIDToTypePointer(WaitingThreadsListNode, shridCurrLast);
pwtlnNewNode = SharedIDToTypePointer(WaitingThreadsListNode, shridNewNode);
shridCurrLast = m_ptrWTLTail.shrid;
pwtlnCurrLast = SharedIDToTypePointer(WaitingThreadsListNode, shridCurrLast);
pwtlnNewNode = SharedIDToTypePointer(WaitingThreadsListNode, shridNewNode);

_ASSERT_MSG(1 == (WTLN_FLAG_OWNER_OBJECT_IS_SHARED & pwtlnNewNode->dwFlags),
"Trying to add a WaitingThreadsListNode marked as local "
"as it was a shared one\n");
_ASSERT_MSG(1 == (WTLN_FLAG_OWNER_OBJECT_IS_SHARED & pwtlnNewNode->dwFlags),
"Trying to add a WaitingThreadsListNode marked as local "
"as it was a shared one\n");

VALIDATEOBJECT(pwtlnNewNode);
VALIDATEOBJECT(pwtlnNewNode);

pwtlnNewNode->ptrNext.shrid = NULL;
if (NULL == pwtlnCurrLast)
{
_ASSERT_MSG(NULL == m_ptrWTLHead.shrid,
"Corrupted waiting list on shared CSynchData at "
"{shrid=%p, p=%p}\n", m_shridThis, this);
pwtlnNewNode->ptrNext.shrid = NULL;
if (NULL == pwtlnCurrLast)
{
_ASSERT_MSG(NULL == m_ptrWTLHead.shrid,
"Corrupted waiting list on shared CSynchData at "
"{shrid=%p, p=%p}\n", m_shridThis, this);

pwtlnNewNode->ptrPrev.shrid = NULL;
m_ptrWTLHead.shrid = shridNewNode;
m_ptrWTLTail.shrid = shridNewNode;
pwtlnNewNode->ptrPrev.shrid = NULL;
m_ptrWTLHead.shrid = shridNewNode;
m_ptrWTLTail.shrid = shridNewNode;
}
else
{
VALIDATEOBJECT(pwtlnCurrLast);

pwtlnNewNode->ptrPrev.shrid = shridCurrLast;
pwtlnCurrLast->ptrNext.shrid = shridNewNode;
m_ptrWTLTail.shrid = shridNewNode;
}
}
else
{
VALIDATEOBJECT(pwtlnCurrLast);
// The wait is prioritized, enqueue to the beginning of the queue
SharedID shridCurrFirst;
WaitingThreadsListNode * pwtlnCurrFirst, * pwtlnNewNode;

shridCurrFirst = m_ptrWTLHead.shrid;
pwtlnCurrFirst = SharedIDToTypePointer(WaitingThreadsListNode, shridCurrFirst);
pwtlnNewNode = SharedIDToTypePointer(WaitingThreadsListNode, shridNewNode);

_ASSERT_MSG(1 == (WTLN_FLAG_OWNER_OBJECT_IS_SHARED & pwtlnNewNode->dwFlags),
"Trying to add a WaitingThreadsListNode marked as local "
"as it was a shared one\n");

VALIDATEOBJECT(pwtlnNewNode);

pwtlnNewNode->ptrPrev.shrid = NULL;
if (NULL == pwtlnCurrFirst)
{
_ASSERT_MSG(NULL == m_ptrWTLTail.shrid,
"Corrupted waiting list on shared CSynchData at "
"{shrid=%p, p=%p}\n", m_shridThis, this);

pwtlnNewNode->ptrPrev.shrid = shridCurrLast;
pwtlnCurrLast->ptrNext.shrid = shridNewNode;
m_ptrWTLTail.shrid = shridNewNode;
pwtlnNewNode->ptrNext.shrid = NULL;
m_ptrWTLHead.shrid = shridNewNode;
m_ptrWTLTail.shrid = shridNewNode;
}
else
{
VALIDATEOBJECT(pwtlnCurrFirst);

pwtlnNewNode->ptrNext.shrid = shridCurrFirst;
pwtlnCurrFirst->ptrPrev.shrid = shridNewNode;
m_ptrWTLHead.shrid = shridNewNode;
}
}

m_ulcWaitingThreads += 1;
Expand Down
2 changes: 1 addition & 1 deletion src/pal/src/synchmgr/synchmanager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3867,7 +3867,7 @@ namespace CorUnix
pwtlnNew->shridWaitingState = pwtlnOld->shridWaitingState;
pwtlnNew->ptwiWaitInfo = pwtlnOld->ptwiWaitInfo;

psdShared->SharedWaiterEnqueue(rgshridWTLNodes[i]);
psdShared->SharedWaiterEnqueue(rgshridWTLNodes[i], false);
psdShared->AddRef();

_ASSERTE(pwtlnOld = pwtlnOld->ptwiWaitInfo->rgpWTLNodes[pwtlnOld->dwObjIndex]);
Expand Down
7 changes: 4 additions & 3 deletions src/pal/src/synchmgr/synchmanager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ namespace CorUnix
CPalThread * pthrCurrent,
CPalThread * pthrTarget);

void WaiterEnqueue(WaitingThreadsListNode * pwtlnNewNode);
void SharedWaiterEnqueue(SharedID shridNewNode);
void WaiterEnqueue(WaitingThreadsListNode * pwtlnNewNode, bool fPrioritize);
void SharedWaiterEnqueue(SharedID shridNewNode, bool fPrioritize);

// Object Domain accessor methods
ObjectDomain GetObjectDomain(void)
Expand Down Expand Up @@ -464,7 +464,8 @@ namespace CorUnix
virtual PAL_ERROR RegisterWaitingThread(
WaitType wtWaitType,
DWORD dwIndex,
bool fAlertable);
bool fAlertable,
bool fPrioritize);

virtual void ReleaseController(void);

Expand Down
35 changes: 33 additions & 2 deletions src/pal/src/synchmgr/wait.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,35 @@ WaitForSingleObject(IN HANDLE hHandle,
}


/*++
Function:
WaitForSingleObjectPrioritized

Similar to WaitForSingleObject, except uses a LIFO release policy for waiting threads by prioritizing new waiters (registering
them at the beginning of the wait queue rather than at the end).
--*/
DWORD
PALAPI
PAL_WaitForSingleObjectPrioritized(IN HANDLE hHandle,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PAL_ [](start = 0, length = 4)

why using PAL_ in the name here? I am not seeing aw are doing that in other places

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PAL tries to mimic the Windows API. For functions that are not in that category I believe the convention is to prefix with PAL_.

Copy link
Member Author

@kouvel kouvel Sep 12, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a bunch of functions that follow this convention, for example https://github.com/dotnet/coreclr/blob/master/src/pal/inc/pal.h#L5393 and others in pal.h

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PAL_WaitForSingleObjectPrioritized [](start = 0, length = 34)

Is it possible we can use WaitForSingleObject with extra parameter instead of adding one more PAL method?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to keep the WaitForSingleObject signature in sync with the Windows API

IN DWORD dwMilliseconds)
{
DWORD dwRet;

PERF_ENTRY(PAL_WaitForSingleObjectPrioritized);
ENTRY("PAL_WaitForSingleObjectPrioritized(hHandle=%p, dwMilliseconds=%u)\n",
hHandle, dwMilliseconds);

CPalThread * pThread = InternalGetCurrentThread();

dwRet = InternalWaitForMultipleObjectsEx(pThread, 1, &hHandle, FALSE,
dwMilliseconds, FALSE, TRUE /* bPrioritize */);

LOGEXIT("PAL_WaitForSingleObjectPrioritized returns DWORD %u\n", dwRet);
PERF_EXIT(PAL_WaitForSingleObjectPrioritized);
return dwRet;
}


/*++
Function:
WaitForSingleObjectEx
Expand Down Expand Up @@ -285,7 +314,8 @@ DWORD CorUnix::InternalWaitForMultipleObjectsEx(
CONST HANDLE *lpHandles,
BOOL bWaitAll,
DWORD dwMilliseconds,
BOOL bAlertable)
BOOL bAlertable,
BOOL bPrioritize)
{
DWORD dwRet = WAIT_FAILED;
PAL_ERROR palErr = NO_ERROR;
Expand Down Expand Up @@ -530,7 +560,8 @@ DWORD CorUnix::InternalWaitForMultipleObjectsEx(
palErr = ppISyncWaitCtrlrs[i]->RegisterWaitingThread(
wtWaitType,
i,
(TRUE == bAlertable));
(TRUE == bAlertable),
bPrioritize != FALSE);
if (NO_ERROR != palErr)
{
ERROR("RegisterWaitingThread() failed for %d-th object "
Expand Down
Loading