diff --git a/src/DurableTask.Netherite.AzureFunctions/NetheriteProvider.cs b/src/DurableTask.Netherite.AzureFunctions/NetheriteProvider.cs index c26ed097..db4a264e 100644 --- a/src/DurableTask.Netherite.AzureFunctions/NetheriteProvider.cs +++ b/src/DurableTask.Netherite.AzureFunctions/NetheriteProvider.cs @@ -117,6 +117,7 @@ public override bool TryGetScaleMonitor( if (this.Service.TryGetScalingMonitor(out var monitor)) { scaleMonitor = new ScaleMonitor(monitor); + monitor.InformationTracer($"ScaleMonitor Constructed Microsoft.Azure.WebJobs.Host.Scale.IScaleMonitor {scaleMonitor.Descriptor}"); return true; } else @@ -158,6 +159,7 @@ public async Task GetMetricsAsync() var cached = cachedMetrics; if (cached != null && DateTime.UtcNow - cached.Item1 < TimeSpan.FromSeconds(1.5)) { + this.scalingMonitor.InformationTracer?.Invoke($"ScaleMonitor returned metrics cached previously, at {cached.Item2.Timestamp:o}"); return cached.Item2; } @@ -174,13 +176,12 @@ public async Task GetMetricsAsync() this.serializer.WriteObject(stream, collectedMetrics); metrics.Metrics = stream.ToArray(); - this.scalingMonitor.Logger.LogInformation( - "Collected scale info for {partitionCount} partitions at {time:o} in {latencyMs:F2}ms.", - collectedMetrics.LoadInformation.Count, collectedMetrics.Timestamp, sw.Elapsed.TotalMilliseconds); + this.scalingMonitor.InformationTracer?.Invoke( + $"ScaleMonitor collected metrics for {collectedMetrics.LoadInformation.Count} partitions at {collectedMetrics.Timestamp:o} in {sw.Elapsed.TotalMilliseconds:F2}ms."); } - catch (Exception e) when (!Utils.IsFatal(e)) + catch (Exception e) { - this.scalingMonitor.Logger.LogError("IScaleMonitor.GetMetricsAsync() failed: {exception}", e); + this.scalingMonitor.ErrorTracer?.Invoke("ScaleMonitor failed to collect metrics", e); } cachedMetrics = new Tuple(DateTime.UtcNow, metrics); @@ -212,12 +213,14 @@ ScaleStatus GetScaleStatusCore(int workerCount, NetheriteScaleMetrics[] metrics) var collectedMetrics = (ScalingMonitor.Metrics) this.serializer.ReadObject(stream); recommendation = this.scalingMonitor.GetScaleRecommendation(workerCount, collectedMetrics); } - } + } catch (Exception e) when (!Utils.IsFatal(e)) { - this.scalingMonitor.Logger.LogError("IScaleMonitor.GetScaleStatus() failed: {exception}", e); + this.scalingMonitor.ErrorTracer?.Invoke("ScaleMonitor failed to compute scale recommendation", e); recommendation = new ScaleRecommendation(ScaleAction.None, keepWorkersAlive: true, reason: "unexpected error"); } + + this.scalingMonitor.RecommendationTracer?.Invoke(recommendation.Action.ToString(), workerCount, recommendation.Reason); ScaleStatus scaleStatus = new ScaleStatus(); switch (recommendation?.Action) diff --git a/src/DurableTask.Netherite/OrchestrationService/NetheriteOrchestrationService.cs b/src/DurableTask.Netherite/OrchestrationService/NetheriteOrchestrationService.cs index 44bdfd61..f1494929 100644 --- a/src/DurableTask.Netherite/OrchestrationService/NetheriteOrchestrationService.cs +++ b/src/DurableTask.Netherite/OrchestrationService/NetheriteOrchestrationService.cs @@ -170,20 +170,26 @@ public bool TryGetScalingMonitor(out ScalingMonitor monitor) if (this.configuredStorage == TransportConnectionString.StorageChoices.Faster && this.configuredTransport == TransportConnectionString.TransportChoices.EventHubs) { - monitor = new ScalingMonitor( - this.Settings.ResolvedStorageConnectionString, - this.Settings.ResolvedTransportConnectionString, - this.Settings.LoadInformationAzureTableName, - this.Settings.HubName, - this.TraceHelper.TraceScaleRecommendation, - this.TraceHelper.Logger); - return true; - } - else - { - monitor = null; - return false; + try + { + monitor = new ScalingMonitor( + this.Settings.ResolvedStorageConnectionString, + this.Settings.ResolvedTransportConnectionString, + this.Settings.LoadInformationAzureTableName, + this.Settings.HubName, + this.TraceHelper.TraceScaleRecommendation, + this.TraceHelper.TraceProgress, + this.TraceHelper.TraceError); + return true; + } + catch (Exception e) + { + this.TraceHelper.TraceError("Failed to construct ScalingMonitor", e); + } } + + monitor = null; + return false; } diff --git a/src/DurableTask.Netherite/Scaling/ScalingMonitor.cs b/src/DurableTask.Netherite/Scaling/ScalingMonitor.cs index 0254ac9f..be65fbc3 100644 --- a/src/DurableTask.Netherite/Scaling/ScalingMonitor.cs +++ b/src/DurableTask.Netherite/Scaling/ScalingMonitor.cs @@ -12,19 +12,22 @@ namespace DurableTask.Netherite.Scaling using System.Threading; using System.Threading.Tasks; using DurableTask.Netherite.EventHubs; - using Microsoft.Extensions.Logging; /// /// Monitors the performance of the Netherite backend and makes scaling decisions. /// - public class ScalingMonitor + public class ScalingMonitor { readonly string storageConnectionString; readonly string eventHubsConnectionString; readonly string partitionLoadTableName; readonly string taskHubName; readonly TransportConnectionString.TransportChoices configuredTransport; - readonly Action recommendationTracer; + + // public logging actions to enable collection of scale-monitor-related logging within the Netherite infrastructure + public Action RecommendationTracer { get; } + public Action InformationTracer { get; } + public Action ErrorTracer { get; } readonly ILoadMonitorService loadMonitor; @@ -33,11 +36,6 @@ public class ScalingMonitor /// public string TaskHubName => this.taskHubName; - /// - /// A logger for scaling events. - /// - public ILogger Logger { get; } - /// /// Creates an instance of the scaling monitor, with the given parameters. /// @@ -46,19 +44,22 @@ public class ScalingMonitor /// The name of the storage table with the partition load information. /// The name of the taskhub. public ScalingMonitor( - string storageConnectionString, - string eventHubsConnectionString, - string partitionLoadTableName, + string storageConnectionString, + string eventHubsConnectionString, + string partitionLoadTableName, string taskHubName, Action recommendationTracer, - ILogger logger) + Action informationTracer, + Action errorTracer) { + this.RecommendationTracer = recommendationTracer; + this.InformationTracer = informationTracer; + this.ErrorTracer = errorTracer; + this.storageConnectionString = storageConnectionString; this.eventHubsConnectionString = eventHubsConnectionString; this.partitionLoadTableName = partitionLoadTableName; this.taskHubName = taskHubName; - this.recommendationTracer = recommendationTracer; - this.Logger = logger; TransportConnectionString.Parse(eventHubsConnectionString, out _, out this.configuredTransport); @@ -129,8 +130,6 @@ public ScaleRecommendation GetScaleRecommendation(int workerCount, Metrics metri { var recommendation = DetermineRecommendation(); - this.recommendationTracer?.Invoke(recommendation.Action.ToString(), workerCount, recommendation.Reason); - return recommendation; ScaleRecommendation DetermineRecommendation()