diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/MetricsPurgeActivity.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/MetricsPurgeActivity.java deleted file mode 100644 index dfb003cc3..000000000 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/MetricsPurgeActivity.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - * - * Modifications Copyright OpenSearch Contributors. See - * GitHub history for details. - */ - -/* - * Copyright 2019-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -package org.opensearch.performanceanalyzer.collectors; - - -import java.io.File; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.opensearch.performanceanalyzer.config.PluginSettings; -import org.opensearch.performanceanalyzer.metrics.MetricsConfiguration; -import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; - -public class MetricsPurgeActivity extends PerformanceAnalyzerMetricsCollector { - private static final Logger LOG = LogManager.getLogger(MetricsPurgeActivity.class); - - public MetricsPurgeActivity() { - super( - MetricsConfiguration.CONFIG_MAP.get(MetricsPurgeActivity.class).samplingInterval, - "MetricsPurgeActivity"); - } - - private static int purgeInterval = - MetricsConfiguration.CONFIG_MAP.get(MetricsPurgeActivity.class).deletionInterval; - - @Override - public void collectMetrics(long startTime) { - deleteEventLogFiles(startTime); - } - - private void deleteEventLogFiles(long referenceTime) { - LOG.debug("Starting to delete old writer files"); - File root = new File(PluginSettings.instance().getMetricsLocation()); - String[] children = root.list(); - if (children == null) { - return; - } - int filesDeletedCount = 0; - for (String child : children) { - File fileToDelete = new File(root, child); - if (fileToDelete.lastModified() - < PerformanceAnalyzerMetrics.getTimeInterval(referenceTime - purgeInterval)) { - PerformanceAnalyzerMetrics.removeMetrics(fileToDelete); - filesDeletedCount += 1; - } - } - LOG.debug("'{}' Old writer files cleaned up.", filesDeletedCount); - } -} diff --git a/src/main/java/org/opensearch/performanceanalyzer/collectors/StatExceptionCode.java b/src/main/java/org/opensearch/performanceanalyzer/collectors/StatExceptionCode.java index 858cee7b0..c9b0e3a80 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/collectors/StatExceptionCode.java +++ b/src/main/java/org/opensearch/performanceanalyzer/collectors/StatExceptionCode.java @@ -28,16 +28,19 @@ public enum StatExceptionCode { TOTAL_ERROR("TotalError"), - METRICS_WRITE_ERROR("MetricsWriteError"), - METRICS_REMOVE_ERROR("MetricsRemoveError"), + // Tracks the number of VM attach/dataDump or detach failures. JVM_ATTACH_ERROR("JvmAttachErrror"), + // This error is thrown if the java_pid file is missing. JVM_ATTACH_ERROR_JAVA_PID_FILE_MISSING("JvmAttachErrorJavaPidFileMissing"), + // The lock could not be acquired within the timeout. JVM_ATTACH_LOCK_ACQUISITION_FAILED("JvmAttachLockAcquisitionFailed"), + // ThreadState could not be found for an OpenSearch thread in the critical OpenSearch path. NO_THREAD_STATE_INFO("NoThreadStateInfo"), + // This metric indicates that we successfully completed a thread-dump. Likewise, // an omission of this should indicate that the thread taking the dump got stuck. JVM_THREAD_DUMP_SUCCESSFUL("JvmThreadDumpSuccessful"), diff --git a/src/main/java/org/opensearch/performanceanalyzer/metrics/MetricsConfiguration.java b/src/main/java/org/opensearch/performanceanalyzer/metrics/MetricsConfiguration.java index d7a7df38a..c7bc096df 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/metrics/MetricsConfiguration.java +++ b/src/main/java/org/opensearch/performanceanalyzer/metrics/MetricsConfiguration.java @@ -32,12 +32,10 @@ import org.opensearch.performanceanalyzer.collectors.DisksCollector; import org.opensearch.performanceanalyzer.collectors.GCInfoCollector; import org.opensearch.performanceanalyzer.collectors.HeapMetricsCollector; -import org.opensearch.performanceanalyzer.collectors.MetricsPurgeActivity; import org.opensearch.performanceanalyzer.collectors.MountedPartitionMetricsCollector; import org.opensearch.performanceanalyzer.collectors.NetworkE2ECollector; import org.opensearch.performanceanalyzer.collectors.NetworkInterfaceCollector; import org.opensearch.performanceanalyzer.collectors.StatsCollector; -import org.opensearch.performanceanalyzer.config.PluginSettings; import org.opensearch.performanceanalyzer.jvm.GCMetrics; import org.opensearch.performanceanalyzer.jvm.HeapMetrics; import org.opensearch.performanceanalyzer.jvm.ThreadList; @@ -50,18 +48,14 @@ public class MetricsConfiguration { public static final int SAMPLING_INTERVAL = 5000; public static final int ROTATION_INTERVAL = 30000; public static final int STATS_ROTATION_INTERVAL = 60000; - public static final int DELETION_INTERVAL = - PluginSettings.instance().getMetricsDeletionInterval(); public static class MetricConfig { public int samplingInterval; public int rotationInterval; - public int deletionInterval; - public MetricConfig(int samplingInterval, int rotationInterval, int deletionInterval) { + public MetricConfig(int samplingInterval, int rotationInterval) { this.samplingInterval = samplingInterval; this.rotationInterval = rotationInterval; - this.deletionInterval = deletionInterval; } } @@ -69,7 +63,7 @@ public MetricConfig(int samplingInterval, int rotationInterval, int deletionInte public static final MetricConfig cdefault; static { - cdefault = new MetricConfig(SAMPLING_INTERVAL, 0, 0); + cdefault = new MetricConfig(SAMPLING_INTERVAL, 0); CONFIG_MAP.put(ThreadCPU.class, cdefault); CONFIG_MAP.put(ThreadDiskIO.class, cdefault); @@ -80,11 +74,8 @@ public MetricConfig(int samplingInterval, int rotationInterval, int deletionInte CONFIG_MAP.put(NetworkE2ECollector.class, cdefault); CONFIG_MAP.put(NetworkInterfaceCollector.class, cdefault); CONFIG_MAP.put(OSGlobals.class, cdefault); - CONFIG_MAP.put(PerformanceAnalyzerMetrics.class, new MetricConfig(0, ROTATION_INTERVAL, 0)); - CONFIG_MAP.put( - MetricsPurgeActivity.class, - new MetricConfig(ROTATION_INTERVAL, 0, DELETION_INTERVAL)); - CONFIG_MAP.put(StatsCollector.class, new MetricConfig(STATS_ROTATION_INTERVAL, 0, 0)); + CONFIG_MAP.put(PerformanceAnalyzerMetrics.class, new MetricConfig(0, ROTATION_INTERVAL)); + CONFIG_MAP.put(StatsCollector.class, new MetricConfig(STATS_ROTATION_INTERVAL, 0)); CONFIG_MAP.put(DisksCollector.class, cdefault); CONFIG_MAP.put(HeapMetricsCollector.class, cdefault); CONFIG_MAP.put(GCInfoCollector.class, cdefault); diff --git a/src/main/java/org/opensearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java b/src/main/java/org/opensearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java index d106de875..585842131 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java +++ b/src/main/java/org/opensearch/performanceanalyzer/metrics/PerformanceAnalyzerMetrics.java @@ -33,13 +33,13 @@ import java.nio.file.Paths; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; +import java.util.function.Supplier; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.apache.logging.log4j.util.Supplier; -import org.opensearch.performanceanalyzer.collectors.StatExceptionCode; -import org.opensearch.performanceanalyzer.collectors.StatsCollector; +import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp; import org.opensearch.performanceanalyzer.config.PluginSettings; +import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; import org.opensearch.performanceanalyzer.reader_writer_shared.Event; @SuppressWarnings("checkstyle:constantname") @@ -139,7 +139,8 @@ public static void addMetricEntry(StringBuilder value, String metricKey, long me private static void emitMetric(BlockingQueue q, Event entry) { if (!q.offer(entry)) { - // TODO: Emit a metric here. + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.METRICS_WRITE_ERROR, entry.key, 1); LOG.debug("Could not enter metric {}", entry); } } @@ -199,7 +200,8 @@ public static void removeMetrics(File keyPathFile) { LOG.debug("Purge Could not delete file {}", keyPathFile); } } catch (Exception ex) { - StatsCollector.instance().logException(StatExceptionCode.METRICS_REMOVE_ERROR); + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.METRICS_REMOVE_ERROR, "", 1); LOG.debug( (Supplier) () -> @@ -207,7 +209,7 @@ public static void removeMetrics(File keyPathFile) { "Error in deleting file: {} for keyPath:{} with ExceptionCode: {}", ex.toString(), keyPathFile.getAbsolutePath(), - StatExceptionCode.METRICS_REMOVE_ERROR.toString()), + WriterMetrics.METRICS_REMOVE_ERROR.toString()), ex); } } diff --git a/src/main/java/org/opensearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java b/src/main/java/org/opensearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java index cb3eea786..b730a27a7 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java +++ b/src/main/java/org/opensearch/performanceanalyzer/rca/framework/metrics/WriterMetrics.java @@ -34,6 +34,15 @@ import org.opensearch.performanceanalyzer.rca.stats.measurements.MeasurementSet; public enum WriterMetrics implements MeasurementSet { + /** Measures the time spent in deleting the event log files */ + EVENT_LOG_FILES_DELETION_TIME( + "EventLogFilesDeletionTime", + "millis", + Arrays.asList(Statistics.MAX, Statistics.MEAN, Statistics.SUM)), + /** Measures the count of event log files deleted */ + EVENT_LOG_FILES_DELETED( + "EventLogFilesDeleted", "count", Arrays.asList(Statistics.MAX, Statistics.SUM)), + SHARD_STATE_COLLECTOR_EXECUTION_TIME( "ShardStateCollectorExecutionTime", "millis", @@ -145,6 +154,15 @@ public enum WriterMetrics implements MeasurementSet { Statistics.SUM)), STALE_METRICS("StaleMetrics", "count", Arrays.asList(Statistics.COUNT)), + + METRICS_WRITE_ERROR( + "MetricsWriteError", + "namedCount", + Collections.singletonList(Statistics.NAMED_COUNTERS)), + + METRICS_REMOVE_ERROR("MetricsRemoveError", "count", Arrays.asList(Statistics.COUNT)), + + METRICS_REMOVE_FAILURE("MetricsRemoveFailure", "count", Arrays.asList(Statistics.COUNT)), ; /** What we want to appear as the metric name. */ diff --git a/src/main/java/org/opensearch/performanceanalyzer/reader_writer_shared/EventLogFileHandler.java b/src/main/java/org/opensearch/performanceanalyzer/reader_writer_shared/EventLogFileHandler.java index b83285b74..851f09a6a 100644 --- a/src/main/java/org/opensearch/performanceanalyzer/reader_writer_shared/EventLogFileHandler.java +++ b/src/main/java/org/opensearch/performanceanalyzer/reader_writer_shared/EventLogFileHandler.java @@ -39,10 +39,14 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; +import java.util.Arrays; import java.util.List; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.performanceanalyzer.PerformanceAnalyzerApp; import org.opensearch.performanceanalyzer.core.Util; +import org.opensearch.performanceanalyzer.metrics.PerformanceAnalyzerMetrics; +import org.opensearch.performanceanalyzer.rca.framework.metrics.WriterMetrics; import org.opensearch.performanceanalyzer.reader.EventDispatcher; public class EventLogFileHandler { @@ -78,7 +82,7 @@ public void writeTmpFile(List dataEntries, long epoch) { * data. * *

If any of the above steps fail, then the tmp file is not deleted from the filesystem. This - * is fine as the MetricsPurgeActivity, will eventually clean it. The copies are atomic and + * is fine as the {@link deleteFiles()}, will eventually clean it. The copies are atomic and * therefore the reader never reads incompletely written file. * * @param dataEntries The metrics to be written to file. @@ -166,4 +170,34 @@ private void readInternal(Path pathToFile, int bufferSize, EventDispatcher proce LOG.error("Error reading file", ex); } } + + public void deleteAllFiles() { + LOG.debug("Cleaning up any leftover files."); + File root = new File(metricsLocation); + // Filter out '.tmp' files, we do not want to delete currBucket .tmp files + String[] filesToDelete = root.list((dir, name) -> !name.endsWith(TMP_FILE_EXT)); + deleteFiles(Arrays.asList(filesToDelete)); + } + + public void deleteFiles(List filesToDelete) { + LOG.debug("Starting to delete old writer files"); + long startTime = System.currentTimeMillis(); + + if (filesToDelete == null) { + return; + } + int filesDeletedCount = 0; + File root = new File(metricsLocation); + for (String fileToDelete : filesToDelete) { + File file = new File(root, fileToDelete); + PerformanceAnalyzerMetrics.removeMetrics(file); + filesDeletedCount += 1; + } + long duration = System.currentTimeMillis() - startTime; + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.EVENT_LOG_FILES_DELETION_TIME, "", duration); + PerformanceAnalyzerApp.WRITER_METRICS_AGGREGATOR.updateStat( + WriterMetrics.EVENT_LOG_FILES_DELETED, "", filesDeletedCount); + LOG.debug("'{}' Old writer files cleaned up.", filesDeletedCount); + } }