From 7e30d2d96fd5cfd8d26a2514aa549804b8eec197 Mon Sep 17 00:00:00 2001 From: Russ Cam Date: Thu, 14 Jan 2021 09:47:03 +1000 Subject: [PATCH] Ensure ETW session is deleted when GcMetricsProvider is disposed (#1124) This commit fixes a bug in collecting GC Metrics on .NET Framework where the underlying ETW session created by TraceEventSession was not stopped and deleted when GcMetricsProvider was disposed. The issue arises due to the using around _traceEventSession in the GcMetricsProvider's ctor, and the Task.Run() inside it, which leaves the session in a running state that can't be deleted on disposing the GcMetricsProvider. An internal TraceEventSession name property exposes the name of the session to allow tests to assert that the session is deleted on dispose. Fixes #897 --- .../MetricsProvider/GcMetricsProvider.cs | 140 ++++++++++-------- test/Elastic.Apm.Tests/MetricsTests.cs | 27 +++- 2 files changed, 96 insertions(+), 71 deletions(-) diff --git a/src/Elastic.Apm/Metrics/MetricsProvider/GcMetricsProvider.cs b/src/Elastic.Apm/Metrics/MetricsProvider/GcMetricsProvider.cs index cb56f3bbe..42df210eb 100644 --- a/src/Elastic.Apm/Metrics/MetricsProvider/GcMetricsProvider.cs +++ b/src/Elastic.Apm/Metrics/MetricsProvider/GcMetricsProvider.cs @@ -12,6 +12,7 @@ using Elastic.Apm.Logging; using Microsoft.Diagnostics.Tracing; using Microsoft.Diagnostics.Tracing.Parsers; +using Microsoft.Diagnostics.Tracing.Parsers.Clr; using Microsoft.Diagnostics.Tracing.Session; // ReSharper disable AccessToDisposedClosure @@ -25,15 +26,14 @@ namespace Elastic.Apm.Metrics.MetricsProvider /// internal class GcMetricsProvider : IMetricsProvider, IDisposable { + private const string SessionNamePrefix = "EtwSessionForCLRElasticApm_"; + internal const string GcCountName = "clr.gc.count"; internal const string GcGen0SizeName = "clr.gc.gen0size"; internal const string GcGen1SizeName = "clr.gc.gen1size"; internal const string GcGen2SizeName = "clr.gc.gen2size"; internal const string GcGen3SizeName = "clr.gc.gen3size"; - - private const string SessionNamePrefix = "EtwSessionForCLRElasticApm_"; - private readonly bool _collectGcCount; private readonly bool _collectGcGen0Size; private readonly bool _collectGcGen1Size; @@ -41,12 +41,18 @@ internal class GcMetricsProvider : IMetricsProvider, IDisposable private readonly bool _collectGcGen3Size; private readonly GcEventListener _eventListener; - private readonly object _lock = new object(); - private readonly IApmLogger _logger; - private readonly TraceEventSession _traceEventSession; + private readonly Task _traceEventSessionTask; + private readonly int _currentProcessId; + + private volatile bool _isMetricAlreadyCaptured; + private uint _gcCount; + private ulong _gen0Size; + private ulong _gen1Size; + private ulong _gen2Size; + private ulong _gen3Size; public GcMetricsProvider(IApmLogger logger, bool collectGcCount = true, bool collectGcGen0Size = true, bool collectGcGen1Size = true, bool collectGcGen2Size = true, bool collectGcGen3Size = true @@ -61,71 +67,35 @@ public GcMetricsProvider(IApmLogger logger, bool collectGcCount = true, bool col _logger = logger.Scoped(nameof(SystemTotalCpuProvider)); if (PlatformDetection.IsDotNetFullFramework) { - var sessionName = SessionNamePrefix + Guid.NewGuid(); - using (_traceEventSession = new TraceEventSession(sessionName)) + TraceEventSessionName = SessionNamePrefix + Guid.NewGuid(); + _traceEventSession = new TraceEventSession(TraceEventSessionName); + _currentProcessId = Process.GetCurrentProcess().Id; + _traceEventSessionTask = Task.Run(() => { - Task.Run(() => + try { - try - { - _traceEventSession.EnableProvider( - ClrTraceEventParser.ProviderGuid, - TraceEventLevel.Informational, - (ulong) - ClrTraceEventParser.Keywords.GC // garbage collector details - ); - } - catch (Exception e) - { - _logger.Warning()?.LogException(e, "TraceEventSession initialization failed - GC metrics won't be collected"); - return; - } - - _traceEventSession.Source.Clr.GCStop += (a) => - { - if (a.ProcessID == Process.GetCurrentProcess().Id) - { - if (!_isMetricAlreadyCaptured) - { - lock (_lock) - _isMetricAlreadyCaptured = true; - } - _gcCount = (uint)a.Count; - } - }; - - _traceEventSession.Source.Clr.GCHeapStats += (a) => - { - if (a.ProcessID == Process.GetCurrentProcess().Id) - { - if (!_isMetricAlreadyCaptured) - { - lock (_lock) - _isMetricAlreadyCaptured = true; - } - _gen0Size = (ulong)a.GenerationSize0; - _gen1Size = (ulong)a.GenerationSize1; - _gen2Size = (ulong)a.GenerationSize2; - _gen3Size = (ulong)a.GenerationSize3; - } - }; - - _traceEventSession.Source.Process(); - }); - } + _traceEventSession.EnableProvider( + ClrTraceEventParser.ProviderGuid, + TraceEventLevel.Informational, + (ulong)ClrTraceEventParser.Keywords.GC // garbage collector details + ); + } + catch (Exception e) + { + _logger.Warning()?.LogException(e, "TraceEventSession initialization failed - GC metrics won't be collected"); + return; + } + + _traceEventSession.Source.Clr.GCStop += ClrOnGCStop; + _traceEventSession.Source.Clr.GCHeapStats += ClrOnGCHeapStats; + _traceEventSession.Source.Process(); + }); } if (PlatformDetection.IsDotNetCore || PlatformDetection.IsDotNet5) _eventListener = new GcEventListener(this, logger); } - private uint _gcCount; - private ulong _gen0Size; - private ulong _gen1Size; - private ulong _gen2Size; - private ulong _gen3Size; - private volatile bool _isMetricAlreadyCaptured; - public int ConsecutiveNumberOfFailedReads { get; set; } public string DbgName => "GcMetricsProvider"; @@ -137,6 +107,12 @@ public bool IsMetricAlreadyCaptured } } + /// + /// The name of the TraceEventSession when using + /// to capture metrics, otherwise null. + /// + internal string TraceEventSessionName { get; } + public IEnumerable GetSamples() { if (_gcCount != 0 || _gen0Size != 0 || _gen2Size != 0 || _gen3Size != 0) @@ -166,9 +142,45 @@ public IEnumerable GetSamples() public void Dispose() { _eventListener?.Dispose(); - _traceEventSession?.Dispose(); + if (_traceEventSession != null) + { + _traceEventSession.Source.Clr.GCStop -= ClrOnGCStop; + _traceEventSession.Source.Clr.GCHeapStats -= ClrOnGCHeapStats; + _traceEventSession.Dispose(); + + if (_traceEventSessionTask.IsCompleted || _traceEventSessionTask.IsFaulted || _traceEventSessionTask.IsCanceled) + _traceEventSessionTask.Dispose(); + } } + private void ClrOnGCHeapStats(GCHeapStatsTraceData a) + { + if (a.ProcessID == _currentProcessId) + { + if (!_isMetricAlreadyCaptured) + { + lock (_lock) + _isMetricAlreadyCaptured = true; + } + _gen0Size = (ulong)a.GenerationSize0; + _gen1Size = (ulong)a.GenerationSize1; + _gen2Size = (ulong)a.GenerationSize2; + _gen3Size = (ulong)a.GenerationSize3; + } + } + + private void ClrOnGCStop(GCEndTraceData a) + { + if (a.ProcessID == _currentProcessId) + { + if (!_isMetricAlreadyCaptured) + { + lock (_lock) + _isMetricAlreadyCaptured = true; + } + _gcCount = (uint)a.Count; + } + } /// /// An event listener that collects the GC stats diff --git a/test/Elastic.Apm.Tests/MetricsTests.cs b/test/Elastic.Apm.Tests/MetricsTests.cs index 12c4b3b9c..3a6d903bc 100644 --- a/test/Elastic.Apm.Tests/MetricsTests.cs +++ b/test/Elastic.Apm.Tests/MetricsTests.cs @@ -18,14 +18,12 @@ using Elastic.Apm.Tests.Mocks; using Elastic.Apm.Tests.TestHelpers; using FluentAssertions; +using Microsoft.Diagnostics.Tracing.Session; using Moq; using Xunit; using Xunit.Abstractions; -#if !NETCOREAPP2_1 using Elastic.Apm.Helpers; -#endif - namespace Elastic.Apm.Tests { public class MetricsTests : LoggingTestBase @@ -33,8 +31,13 @@ public class MetricsTests : LoggingTestBase private const string ThisClassName = nameof(MetricsTests); private readonly IApmLogger _logger; + private ITestOutputHelper _output; - public MetricsTests(ITestOutputHelper xUnitOutputHelper) : base(xUnitOutputHelper) => _logger = LoggerBase.Scoped(ThisClassName); + public MetricsTests(ITestOutputHelper xUnitOutputHelper) : base(xUnitOutputHelper) + { + _output = xUnitOutputHelper; + _logger = LoggerBase.Scoped(ThisClassName); + } public static IEnumerable DisableProviderTestData { @@ -274,8 +277,10 @@ public void CollectAllMetrics_ShouldNotDisableProvider_WhenAnyValueIsSamplesIsVa public void CollectGcMetrics() { var logger = new TestLogger(LogLevel.Trace); + string traceEventSessionName; using (var gcMetricsProvider = new GcMetricsProvider(logger)) { + traceEventSessionName = gcMetricsProvider.TraceEventSessionName; gcMetricsProvider.IsMetricAlreadyCaptured.Should().BeFalse(); #if !NETCOREAPP2_1 @@ -304,7 +309,7 @@ public void CollectGcMetrics() var samples = gcMetricsProvider.GetSamples(); - containsValue = samples != null && samples.Count() != 0; + containsValue = samples != null && samples.Any(); if (containsValue) break; @@ -312,18 +317,20 @@ public void CollectGcMetrics() if (PlatformDetection.IsDotNetFullFramework) { - if (logger.Lines.Where(n => n.Contains("TraceEventSession initialization failed - GC metrics won't be collected")).Any()) + if (logger.Lines.Any(n => n.Contains("TraceEventSession initialization failed - GC metrics won't be collected"))) { // If initialization fails, (e.g. because ETW session initalization fails) we don't assert + _output.WriteLine("Initialization failed. don't make assertions"); return; } } if (PlatformDetection.IsDotNetCore || PlatformDetection.IsDotNet5) { - if (!logger.Lines.Where(n => n.Contains("OnEventWritten with GC")).Any()) + if (!logger.Lines.Any(n => n.Contains("OnEventWritten with GC"))) { // If no OnWritten with a GC event was called then initialization failed -> we don't assert + _output.WriteLine("Initialization failed. don't make assertions"); return; } } @@ -331,6 +338,12 @@ public void CollectGcMetrics() gcMetricsProvider.IsMetricAlreadyCaptured.Should().BeTrue(); #endif } + + if (PlatformDetection.IsDotNetFullFramework) + { + var traceEventSession = TraceEventSession.GetActiveSession(traceEventSessionName); + traceEventSession.Should().BeNull(); + } } internal class MetricsProviderWithException : IMetricsProvider