Skip to content

Commit

Permalink
189 - Generate a dump when the size of the generation exceeds thresho…
Browse files Browse the repository at this point in the history
…ld (#197)

* 189 - Generate a dump when the size of the generation exceeds threshold

* Add another GC.Collect for test
  • Loading branch information
MarioHewardt authored Jul 17, 2023
1 parent 569cb49 commit bb67c58
Show file tree
Hide file tree
Showing 13 changed files with 413 additions and 81 deletions.
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ procdump [-n Count]
[-s Seconds]
[-c|-cl CPU_Usage]
[-m|-ml Commit_Usage1[,Commit_Usage2,...]]
[-gcm Memory_Usage1[,Memory_Usage2...]]
[-gcm [<GCGeneration>: | LOH: | POH:]Memory_Usage1[,Memory_Usage2...]]
[-gcgen Generation]
[-tc Thread_Threshold]
[-fc FileDescriptor_Threshold]
Expand All @@ -46,7 +46,7 @@ Options:
-cl CPU threshold below which to create a dump of the process.
-m Memory commit threshold(s) (MB) above which to create dumps.
-ml Memory commit threshold(s) (MB) below which to create dumps.
-gcm [.NET] GC memory threshold(s) (MB) above which to create dumps.
-gcm [.NET] GC memory threshold(s) (MB) above which to create dumps for the specified generation or heap (default is total .NET memory usage).
-gcgen [.NET] Create dump when the garbage collection of the specified generation starts and finishes.
-tc Thread count threshold above which to create a dump of the process.
-fc File descriptor count threshold above which to create a dump of the process.
Expand Down Expand Up @@ -94,10 +94,18 @@ The following will create a core dump when memory usage is >= 100 MB followed by
```
sudo procdump -m 100,200 1234
```
The following will create a core dump when .NET memory usage is >= 100 MB followed by another dump when memory usage is >= 200MB.
The following will create a core dump when the total .NET memory usage is >= 100 MB followed by another dump when memory usage is >= 200MB.
```
sudo procdump -gcm 100,200 1234
```
The following will create a core dump when .NET memory usage for generation 1 is >= 1 MB followed by another dump when memory usage is >= 2MB.
```
sudo procdump -gcm 1:1,2 1234
```
The following will create a core dump when .NET Large Object Heap memory usage is >= 100 MB followed by another dump when memory usage is >= 200MB.
```
sudo procdump -gcm LOH:100,200 1234
```
The following will create a core dump at the start and end of a .NET generation 1 garbage collection.
```
sudo procdump -gcgen 1
Expand Down
3 changes: 1 addition & 2 deletions include/Monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ bool BeginMonitoring(struct ProcDumpConfiguration *self);
bool MonitorDotNet(struct ProcDumpConfiguration *self);
char* GetThresholds(struct ProcDumpConfiguration *self);
char* GetClientData(struct ProcDumpConfiguration *self, char* fullDumpPath);
char* GetClientDataInt(enum TriggerType triggerType, char* path, int value);
char* GetClientDataString(enum TriggerType triggerType, char* path, char* value);
char* GetClientDataHelper(enum TriggerType triggerType, char* path, const char* format, ...);
bool ExitProcessMonitor(struct ProcDumpConfiguration* config, pthread_t processMonitor);

// Monitor worker threads
Expand Down
3 changes: 3 additions & 0 deletions include/ProfilerCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#ifndef PROFILERCOMMON_H
#define PROFILERCOMMON_H

#define CUMULATIVE_GC_SIZE 2008
#define MAX_GC_GEN 2

enum TriggerType
{
Processor,
Expand Down
4 changes: 2 additions & 2 deletions procdump.1
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ procdump [-n Count]
[-s Seconds]
[-c|-cl CPU_Usage]
[-m|-ml Commit_Usage1[,Commit_Usage2,...]]
[-gcm Memory_Usage1[,Memory_Usage2...]]
[-gcm [<GCGeneration>: | LOH: | POH:]Memory_Usage1[,Memory_Usage2...]]
[-gcgen Generation]
[-tc Thread_Threshold]
[-fc FileDescriptor_Threshold]
Expand All @@ -28,7 +28,7 @@ Options:
-cl CPU threshold below which to create a dump of the process.
-m Memory commit thresholds (MB) above which to create dumps.
-ml Memory commit thresholds (MB) below which to create dumps.
-gcm [.NET] GC memory threshold(s) (MB) above which to create dumps.
-gcm [.NET] GC memory threshold(s) (MB) above which to create dumps for the specified generation or heap (default is total .NET memory usage).
-gcgen [.NET] Create dump when the garbage collection of the specified generation starts and finishes.
-tc Thread count threshold above which to create a dump of the process.
-fc File descriptor count threshold above which to create a dump of the process.
Expand Down
3 changes: 1 addition & 2 deletions profiler/inc/ProcDumpProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ class CorProfiler : public ICorProfilerCallback8
pthread_mutex_t endDumpCondition;
enum TriggerType triggerType;
int currentThresholdIndex;
bool gen2Collection;
int gcGeneration;
bool gcGenStarted;

Expand All @@ -129,7 +128,7 @@ class CorProfiler : public ICorProfilerCallback8
int send_all(int socket, void* buffer, size_t length);
int recv_all(int socket, void* buffer, size_t length);
bool WildcardSearch(WCHAR*, WCHAR*);
u_int64_t GetGCHeapSize();
u_int64_t GetGCHeapSize(int generation);
bool WriteDumpHelper(std::string dumpName);
bool IsHighPerfBasicGC();

Expand Down
96 changes: 68 additions & 28 deletions profiler/src/ProcDumpProfiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ bool CorProfiler::IsHighPerfBasicGC()
//------------------------------------------------------------------------------------------------------------------------------------------------------
CorProfiler::CorProfiler() :
refCount(0), corProfilerInfo8(nullptr), corProfilerInfo3(nullptr), corProfilerInfo(nullptr),
procDumpPid(0), currentThresholdIndex(0), gen2Collection(false), gcGeneration(-1), gcGenStarted(false)
procDumpPid(0), currentThresholdIndex(0), gcGeneration(-1), gcGenStarted(false)
{
// Configure logging
el::Loggers::reconfigureAllLoggers(el::ConfigurationType::Filename, LOG_FILE);
Expand Down Expand Up @@ -346,13 +346,13 @@ WCHAR* CorProfiler::GetUint16(char* buffer)
// Syntax of client data: <trigger_type><fullpathtodumplocation>;...
//
// DOTNET_EXCEPTION_TRIGGER;<fullpathtodumplocation>;<pidofprocdump>;<exception>:<numdumps>;<exception>:<numdumps>,...
// DOTNET_GC_THRESHOLD_TRIGGER;<fullpathtodumplocation>;<pidofprocdump>;Threshold1;Threshold2,...
// DOTNET_GC_THRESHOLD_TRIGGER;<fullpathtodumplocation>;<pidofprocdump>;[<generation> | 3(LOH) | 4(POH) | 2008 (total mem)];Threshold1;Threshold2,...
// DOTNET_GC_GEN_TRIGGER;<fullpathtodumplocation>;<pidofprocdump>;GCGeneration
//
//------------------------------------------------------------------------------------------------------------------------------------------------------
bool CorProfiler::ParseClientData(char* clientData)
{
LOG(TRACE) << "CorProfiler::ParseClientData: Enter";
LOG(TRACE) << "CorProfiler::ParseClientData: Enter clientData = " << clientData;
std::stringstream clientDataStream(clientData);
std::string segment;
std::vector<std::string> dataList;
Expand Down Expand Up @@ -415,7 +415,16 @@ bool CorProfiler::ParseClientData(char* clientData)
else if (triggerType == GCThreshold)
{
// GC threshold list
gcMemoryThresholdMonitorList.push_back(std::stoi(dataItem) << 20);
if(i == 3)
{
// first element is the generation
gcGeneration = std::stoi(dataItem);
i++;
}
else
{
gcMemoryThresholdMonitorList.push_back(std::stoi(dataItem) << 20);
}
}
else if (triggerType == GCGeneration)
{
Expand All @@ -442,7 +451,14 @@ bool CorProfiler::ParseClientData(char* clientData)

if(gcGeneration != -1)
{
LOG(TRACE) << "CorProfiler::ParseClientData:GCGeneration " << gcGeneration;
if( gcGeneration == CUMULATIVE_GC_SIZE)
{
LOG(TRACE) << "CorProfiler::ParseClientData:GCGeneration " << "Cumulative";
}
else
{
LOG(TRACE) << "CorProfiler::ParseClientData:GCGeneration " << gcGeneration;
}
}

LOG(TRACE) << "CorProfiler::ParseClientData: Exit";
Expand Down Expand Up @@ -720,7 +736,7 @@ std::string CorProfiler::GetDumpName(u_int16_t dumpCount, std::string name)
//------------------------------------------------------------------------------------------------------------------------------------------------------
// CorProfiler::GetGCHeapSize
//------------------------------------------------------------------------------------------------------------------------------------------------------
uint64_t CorProfiler::GetGCHeapSize()
uint64_t CorProfiler::GetGCHeapSize(int generation)
{
LOG(TRACE) << "CorProfiler::GetGCHeapSize: Enter";
uint64_t gcHeapSize = 0;
Expand Down Expand Up @@ -766,7 +782,17 @@ uint64_t CorProfiler::GetGCHeapSize()

for (int i = nObjectRanges - 1; i >= 0; i--)
{
gcHeapSize += pObjectRanges[i].rangeLength;
// Uncomment this to help track down .NET memory usage while debugging.
//LOG(TRACE) << "Range Len: " << pObjectRanges[i].rangeLength << " Gen: " << pObjectRanges[i].generation;

if(generation == CUMULATIVE_GC_SIZE)
{
gcHeapSize += pObjectRanges[i].rangeLength;
}
else if(pObjectRanges[i].generation == generation)
{
gcHeapSize += pObjectRanges[i].rangeLength;
}
}

if(fHeapAlloc == true)
Expand Down Expand Up @@ -827,27 +853,29 @@ HRESULT STDMETHODCALLTYPE CorProfiler::GarbageCollectionStarted(int cGenerations
{
LOG(TRACE) << "CorProfiler::GarbageCollectionStarted: Enter";

if(gcMemoryThresholdMonitorList.size() > 0 && generationCollected[2] == true)
if(gcGeneration != -1 &&
gcGenStarted == false &&
gcGeneration == CUMULATIVE_GC_SIZE ||
(gcGeneration < cGenerations &&
generationCollected[gcGeneration] == true))
{
// GC memory threshold dump
gen2Collection = true;
}
else if(gcGeneration != -1 && gcGenStarted == false && gcGeneration < cGenerations && generationCollected[gcGeneration] == true)
{
// GC Generation dump
LOG(TRACE) << "CorProfiler::GarbageCollectionStarted: Dump on generation: " << gcGeneration << " and cGenerations = " << cGenerations;
std::string dump = GetDumpName(1, convertString<std::string,std::wstring>(L"gc_gen"));
if(WriteDumpHelper(dump) == false)
gcGenStarted = true;

if(gcMemoryThresholdMonitorList.size() == 0)
{
SendCatastrophicFailureStatus();
return E_FAIL;
// GC Generation dump
LOG(TRACE) << "CorProfiler::GarbageCollectionStarted: Dump on generation: " << gcGeneration << " and cGenerations = " << cGenerations;
std::string dump = GetDumpName(1, convertString<std::string,std::wstring>(L"gc_gen"));
if(WriteDumpHelper(dump) == false)
{
SendCatastrophicFailureStatus();
return E_FAIL;
}
}

gcGenStarted = true;
}
else
{
LOG(TRACE) << "CorProfiler::GarbageCollectionStarted: Invalid trigger data, trigger = " << triggerType << " cGenerations " << cGenerations;
LOG(TRACE) << "CorProfiler::GarbageCollectionStarted: Trigger = " << triggerType << " cGenerations " << cGenerations << " gcGeneration " << gcGeneration << " threshold size " << gcMemoryThresholdMonitorList.size();
}

LOG(TRACE) << "CorProfiler::GarbageCollectionStarted: Exit";
Expand All @@ -861,12 +889,24 @@ HRESULT STDMETHODCALLTYPE CorProfiler::GarbageCollectionFinished()
{
LOG(TRACE) << "CorProfiler::GarbageCollectionFinished: Enter";

if(gen2Collection == true)
if(gcGenStarted == true && gcMemoryThresholdMonitorList.size() > 0)
{
// During a GC threshold trigger, we only want to check heap sizes and thresholds after a gen2 collection
gen2Collection = false;
uint64_t heapSize = GetGCHeapSize();
LOG(TRACE) << "CorProfiler::GarbageCollectionFinished: Total heap size " << heapSize;
// During a GC threshold trigger, we only want to check heap sizes and thresholds after the gen collection
uint64_t heapSize = 0;
gcGenStarted = false;

if(gcGeneration == CUMULATIVE_GC_SIZE)
{
// If a generation was not explicitly specified on the command line (for example: -gcm 10,20,30) we want to get _all_ the memory of
// the managed heap and hence we add up all generations, LOH and POH.
heapSize += GetGCHeapSize(CUMULATIVE_GC_SIZE);
LOG(TRACE) << "CorProfiler::GarbageCollectionFinished: Cumulative heap size " << heapSize;
}
else
{
heapSize = GetGCHeapSize(gcGeneration);
LOG(TRACE) << "CorProfiler::GarbageCollectionFinished: Generation " << gcGeneration << " heap size " << heapSize;
}

if(currentThresholdIndex < gcMemoryThresholdMonitorList.size() && heapSize >= gcMemoryThresholdMonitorList[currentThresholdIndex])
{
Expand Down Expand Up @@ -894,7 +934,7 @@ HRESULT STDMETHODCALLTYPE CorProfiler::GarbageCollectionFinished()
}
}
}
else if(gcGenStarted == true)
else if(gcGenStarted == true && gcMemoryThresholdMonitorList.size() == 0)
{
// GC Generation dump
LOG(TRACE) << "CorProfiler::GarbageCollectionFinished: Dump on generation: " << gcGeneration;
Expand Down
55 changes: 20 additions & 35 deletions src/Monitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -1381,7 +1381,7 @@ char* GetClientData(struct ProcDumpConfiguration *self, char* fullDumpPath)
return NULL;
}

clientData = GetClientDataString(Exception, fullDumpPath, exceptionFilter);
clientData = GetClientDataHelper(Exception, fullDumpPath, "%s", exceptionFilter);
if(clientData == NULL)
{
Trace("GetClientData: Failed to get client data (-e).");
Expand All @@ -1390,25 +1390,25 @@ char* GetClientData(struct ProcDumpConfiguration *self, char* fullDumpPath)
}
else if (self->bMonitoringGCMemory)
{
// GC Memory trigger (-gcm);<fullpathtodumplocation>;<pidofprocdump>;Threshold1;Threshold2,...
// GC Memory trigger (-gcm);<fullpathtodumplocation>;<pidofprocdump>;Generation:Threshold1;Threshold2,...
thresholds = GetThresholds(self);
if(thresholds == NULL)
{
Trace("GetClientData: Failed to get thresholds.");
return NULL;
}

clientData = GetClientDataString(GCThreshold, fullDumpPath, thresholds);
clientData = GetClientDataHelper(GCThreshold, fullDumpPath, "%d;%s", self->DumpGCGeneration == -1 ? CUMULATIVE_GC_SIZE : self->DumpGCGeneration, thresholds);
if(clientData == NULL)
{
Trace("GetClientData: Failed to get client data (-gcm).");
return NULL;
}
}
else if(self->DumpGCGeneration)
else if(self->DumpGCGeneration != -1 && self->MemoryThreshold == NULL)
{
// GC Generation (-gcgen);<fullpathtodumplocation>;<pidofprocdump>;GCGeneration
clientData = GetClientDataInt(GCGeneration, fullDumpPath, self->DumpGCGeneration);
clientData = GetClientDataHelper(GCGeneration, fullDumpPath, "%d", self->DumpGCGeneration);
if(clientData == NULL)
{
Trace("GetClientData: Failed to get client data (-gcgen).");
Expand All @@ -1427,51 +1427,36 @@ char* GetClientData(struct ProcDumpConfiguration *self, char* fullDumpPath)

//-------------------------------------------------------------------------------------
//
// GetClientDataString
// GetClientDataHelper
//
// Helper that fetches client data where the value is an 'char*'.
// Helper that fetches client data based on format specified.
//
//-------------------------------------------------------------------------------------
char* GetClientDataString(enum TriggerType triggerType, char* path, char* value)
char* GetClientDataHelper(enum TriggerType triggerType, char* path, const char* format, ...)
{
unsigned int clientDataSize = 0;
unsigned int clientDataPrefixSize = 0;
char* clientData = NULL;

clientDataSize = snprintf(NULL, 0, "%d;%s;%d;%s", triggerType, path, getpid(), value) + 1;
clientData = malloc(clientDataSize);
if(clientData == NULL)
{
Trace("GetClientDataString: Failed to allocate memory for client data.");
return NULL;
}

sprintf(clientData, "%d;%s;%d;%s", triggerType, path, getpid(), value);

return clientData;
}

//-------------------------------------------------------------------------------------
//
// GetClientDataInt
//
// Helper that fetches client data where the value is an 'int'.
//
//-------------------------------------------------------------------------------------
char* GetClientDataInt(enum TriggerType triggerType, char* path, int value)
{
unsigned int clientDataSize = 0;
char* clientData = NULL;
va_list args;
va_start(args, format);

clientDataSize = snprintf(NULL, 0, "%d;%s;%d;%d", triggerType, path, getpid(), value) + 1;
clientDataPrefixSize = snprintf(NULL, 0, "%d;%s;%d;", triggerType, path, getpid());
va_list args_copy;
va_copy(args_copy, args);
clientDataSize = clientDataPrefixSize + vsnprintf(NULL, 0, format, args_copy) + 1;
va_end(args_copy);
clientData = malloc(clientDataSize);
if(clientData == NULL)
{
Trace("GetClientDataInt: Failed to allocate memory for client data.");
Trace("GetClientDataHelper: Failed to allocate memory for client data.");
return NULL;
}

sprintf(clientData, "%d;%s;%d;%d", triggerType, path, getpid(), value);
sprintf(clientData, "%d;%s;%d;", triggerType, path, getpid());
vsprintf(clientData+clientDataPrefixSize, format, args);

va_end(args);
return clientData;
}

Expand Down
Loading

0 comments on commit bb67c58

Please sign in to comment.