Skip to content

Commit

Permalink
Ensure ETW session is deleted when GcMetricsProvider is disposed
Browse files Browse the repository at this point in the history
This commit fixes a bug in collecting GC Metrics on
.NET Framework where the underlying ETW session created by
TraceEventSession was not stopped and deleted when
GcMetricsProvider was disposed. The issue arises due
to the using around _traceEventSession in the
GcMetricsProvider's ctor, and the Task.Run() inside it,
which leaves the session in a running state that can't
be deleted on disposing the GcMetricsProvider.

An internal TraceEventSession name property exposes
the name of the session to allow tests to assert that
the session is deleted on dispose.

Fixes elastic#897
  • Loading branch information
russcam committed Jan 13, 2021
1 parent cc900c4 commit 09dae80
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 71 deletions.
140 changes: 76 additions & 64 deletions src/Elastic.Apm/Metrics/MetricsProvider/GcMetricsProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
using Elastic.Apm.Logging;
using Microsoft.Diagnostics.Tracing;
using Microsoft.Diagnostics.Tracing.Parsers;
using Microsoft.Diagnostics.Tracing.Parsers.Clr;
using Microsoft.Diagnostics.Tracing.Session;

// ReSharper disable AccessToDisposedClosure
Expand All @@ -25,28 +26,33 @@ namespace Elastic.Apm.Metrics.MetricsProvider
/// </summary>
internal class GcMetricsProvider : IMetricsProvider, IDisposable
{
private const string SessionNamePrefix = "EtwSessionForCLRElasticApm_";

internal const string GcCountName = "clr.gc.count";
internal const string GcGen0SizeName = "clr.gc.gen0size";
internal const string GcGen1SizeName = "clr.gc.gen1size";
internal const string GcGen2SizeName = "clr.gc.gen2size";
internal const string GcGen3SizeName = "clr.gc.gen3size";


private const string SessionNamePrefix = "EtwSessionForCLRElasticApm_";

private readonly bool _collectGcCount;
private readonly bool _collectGcGen0Size;
private readonly bool _collectGcGen1Size;
private readonly bool _collectGcGen2Size;
private readonly bool _collectGcGen3Size;

private readonly GcEventListener _eventListener;

private readonly object _lock = new object();

private readonly IApmLogger _logger;

private readonly TraceEventSession _traceEventSession;
private readonly Task _traceEventSessionTask;
private readonly int _currentProcessId;

private volatile bool _isMetricAlreadyCaptured;
private uint _gcCount;
private ulong _gen0Size;
private ulong _gen1Size;
private ulong _gen2Size;
private ulong _gen3Size;

public GcMetricsProvider(IApmLogger logger, bool collectGcCount = true, bool collectGcGen0Size = true, bool collectGcGen1Size = true,
bool collectGcGen2Size = true, bool collectGcGen3Size = true
Expand All @@ -61,71 +67,35 @@ public GcMetricsProvider(IApmLogger logger, bool collectGcCount = true, bool col
_logger = logger.Scoped(nameof(SystemTotalCpuProvider));
if (PlatformDetection.IsDotNetFullFramework)
{
var sessionName = SessionNamePrefix + Guid.NewGuid();
using (_traceEventSession = new TraceEventSession(sessionName))
TraceEventSessionName = SessionNamePrefix + Guid.NewGuid();
_traceEventSession = new TraceEventSession(TraceEventSessionName);
_currentProcessId = Process.GetCurrentProcess().Id;
_traceEventSessionTask = Task.Run(() =>
{
Task.Run(() =>
try
{
try
{
_traceEventSession.EnableProvider(
ClrTraceEventParser.ProviderGuid,
TraceEventLevel.Informational,
(ulong)
ClrTraceEventParser.Keywords.GC // garbage collector details
);
}
catch (Exception e)
{
_logger.Warning()?.LogException(e, "TraceEventSession initialization failed - GC metrics won't be collected");
return;
}

_traceEventSession.Source.Clr.GCStop += (a) =>
{
if (a.ProcessID == Process.GetCurrentProcess().Id)
{
if (!_isMetricAlreadyCaptured)
{
lock (_lock)
_isMetricAlreadyCaptured = true;
}
_gcCount = (uint)a.Count;
}
};

_traceEventSession.Source.Clr.GCHeapStats += (a) =>
{
if (a.ProcessID == Process.GetCurrentProcess().Id)
{
if (!_isMetricAlreadyCaptured)
{
lock (_lock)
_isMetricAlreadyCaptured = true;
}
_gen0Size = (ulong)a.GenerationSize0;
_gen1Size = (ulong)a.GenerationSize1;
_gen2Size = (ulong)a.GenerationSize2;
_gen3Size = (ulong)a.GenerationSize3;
}
};

_traceEventSession.Source.Process();
});
}
_traceEventSession.EnableProvider(
ClrTraceEventParser.ProviderGuid,
TraceEventLevel.Informational,
(ulong)ClrTraceEventParser.Keywords.GC // garbage collector details
);
}
catch (Exception e)
{
_logger.Warning()?.LogException(e, "TraceEventSession initialization failed - GC metrics won't be collected");
return;
}

_traceEventSession.Source.Clr.GCStop += ClrOnGCStop;
_traceEventSession.Source.Clr.GCHeapStats += ClrOnGCHeapStats;
_traceEventSession.Source.Process();
});
}

if (PlatformDetection.IsDotNetCore || PlatformDetection.IsDotNet5)
_eventListener = new GcEventListener(this, logger);
}

private uint _gcCount;
private ulong _gen0Size;
private ulong _gen1Size;
private ulong _gen2Size;
private ulong _gen3Size;
private volatile bool _isMetricAlreadyCaptured;

public int ConsecutiveNumberOfFailedReads { get; set; }
public string DbgName => "GcMetricsProvider";

Expand All @@ -137,6 +107,12 @@ public bool IsMetricAlreadyCaptured
}
}

/// <summary>
/// The name of the TraceEventSession when using <see cref="TraceEventSession"/>
/// to capture metrics, otherwise null.
/// </summary>
internal string TraceEventSessionName { get; }

public IEnumerable<MetricSample> GetSamples()
{
if (_gcCount != 0 || _gen0Size != 0 || _gen2Size != 0 || _gen3Size != 0)
Expand Down Expand Up @@ -166,9 +142,45 @@ public IEnumerable<MetricSample> GetSamples()
public void Dispose()
{
_eventListener?.Dispose();
_traceEventSession?.Dispose();
if (_traceEventSession != null)
{
_traceEventSession.Source.Clr.GCStop -= ClrOnGCStop;
_traceEventSession.Source.Clr.GCHeapStats -= ClrOnGCHeapStats;
_traceEventSession.Dispose();

if (_traceEventSessionTask.IsCompleted || _traceEventSessionTask.IsFaulted || _traceEventSessionTask.IsCanceled)
_traceEventSessionTask.Dispose();
}
}

private void ClrOnGCHeapStats(GCHeapStatsTraceData a)
{
if (a.ProcessID == _currentProcessId)
{
if (!_isMetricAlreadyCaptured)
{
lock (_lock)
_isMetricAlreadyCaptured = true;
}
_gen0Size = (ulong)a.GenerationSize0;
_gen1Size = (ulong)a.GenerationSize1;
_gen2Size = (ulong)a.GenerationSize2;
_gen3Size = (ulong)a.GenerationSize3;
}
}

private void ClrOnGCStop(GCEndTraceData a)
{
if (a.ProcessID == _currentProcessId)
{
if (!_isMetricAlreadyCaptured)
{
lock (_lock)
_isMetricAlreadyCaptured = true;
}
_gcCount = (uint)a.Count;
}
}

/// <summary>
/// An event listener that collects the GC stats
Expand Down
27 changes: 20 additions & 7 deletions test/Elastic.Apm.Tests/MetricsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,26 @@
using Elastic.Apm.Tests.Mocks;
using Elastic.Apm.Tests.TestHelpers;
using FluentAssertions;
using Microsoft.Diagnostics.Tracing.Session;
using Moq;
using Xunit;
using Xunit.Abstractions;
#if !NETCOREAPP2_1
using Elastic.Apm.Helpers;

#endif

namespace Elastic.Apm.Tests
{
public class MetricsTests : LoggingTestBase
{
private const string ThisClassName = nameof(MetricsTests);

private readonly IApmLogger _logger;
private ITestOutputHelper _output;

public MetricsTests(ITestOutputHelper xUnitOutputHelper) : base(xUnitOutputHelper) => _logger = LoggerBase.Scoped(ThisClassName);
public MetricsTests(ITestOutputHelper xUnitOutputHelper) : base(xUnitOutputHelper)
{
_output = xUnitOutputHelper;
_logger = LoggerBase.Scoped(ThisClassName);
}

public static IEnumerable<object[]> DisableProviderTestData
{
Expand Down Expand Up @@ -274,8 +277,10 @@ public void CollectAllMetrics_ShouldNotDisableProvider_WhenAnyValueIsSamplesIsVa
public void CollectGcMetrics()
{
var logger = new TestLogger(LogLevel.Trace);
string traceEventSessionName;
using (var gcMetricsProvider = new GcMetricsProvider(logger))
{
traceEventSessionName = gcMetricsProvider.TraceEventSessionName;
gcMetricsProvider.IsMetricAlreadyCaptured.Should().BeFalse();

#if !NETCOREAPP2_1
Expand Down Expand Up @@ -304,33 +309,41 @@ public void CollectGcMetrics()

var samples = gcMetricsProvider.GetSamples();

containsValue = samples != null && samples.Count() != 0;
containsValue = samples != null && samples.Any();

if (containsValue)
break;
}

if (PlatformDetection.IsDotNetFullFramework)
{
if (logger.Lines.Where(n => n.Contains("TraceEventSession initialization failed - GC metrics won't be collected")).Any())
if (logger.Lines.Any(n => n.Contains("TraceEventSession initialization failed - GC metrics won't be collected")))
{
// If initialization fails, (e.g. because ETW session initalization fails) we don't assert
_output.WriteLine("Initialization failed. don't make assertions");
return;
}
}

if (PlatformDetection.IsDotNetCore || PlatformDetection.IsDotNet5)
{
if (!logger.Lines.Where(n => n.Contains("OnEventWritten with GC")).Any())
if (!logger.Lines.Any(n => n.Contains("OnEventWritten with GC")))
{
// If no OnWritten with a GC event was called then initialization failed -> we don't assert
_output.WriteLine("Initialization failed. don't make assertions");
return;
}
}
containsValue.Should().BeTrue();
gcMetricsProvider.IsMetricAlreadyCaptured.Should().BeTrue();
#endif
}

if (PlatformDetection.IsDotNetFullFramework)
{
var traceEventSession = TraceEventSession.GetActiveSession(traceEventSessionName);
traceEventSession.Should().BeNull();
}
}

internal class MetricsProviderWithException : IMetricsProvider
Expand Down

0 comments on commit 09dae80

Please sign in to comment.