From e74c05bfe6732e5e0fd98b60912b34d8462bd43d Mon Sep 17 00:00:00 2001 From: Markus Hintersteiner Date: Mon, 8 Apr 2024 16:48:58 +0200 Subject: [PATCH] Update normalization for Metrics (#3332) * Update normalization for Metrics * Update Changelog * Do not escape control chars, as they get stripped by relay * Update Changelog * Update Changelog --- CHANGELOG.md | 6 ++ sentry/api/sentry.api | 6 +- .../java/io/sentry/metrics/MetricsHelper.java | 84 +++++++++++-------- .../io/sentry/metrics/MetricsHelperTest.kt | 51 +++++++---- 4 files changed, 93 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7019755c21..ea18a80dc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +## Features + +- Update normalization of metrics keys, tags and values ([#3332](https://github.com/getsentry/sentry-java/pull/3332)) + ## 7.7.0 ### Features diff --git a/sentry/api/sentry.api b/sentry/api/sentry.api index 6d30b9cca7..54f85a9061 100644 --- a/sentry/api/sentry.api +++ b/sentry/api/sentry.api @@ -3534,14 +3534,14 @@ public final class io/sentry/metrics/MetricsHelper { public static fun convertNanosTo (Lio/sentry/MeasurementUnit$Duration;J)D public static fun encodeMetrics (JLjava/util/Collection;Ljava/lang/StringBuilder;)V public static fun getCutoffTimestampMs (J)J - public static fun getDayBucketKey (Ljava/util/Calendar;)J public static fun getExportKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;)Ljava/lang/String; public static fun getMetricBucketKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;Ljava/util/Map;)Ljava/lang/String; public static fun getTimeBucketKey (J)J public static fun mergeTags (Ljava/util/Map;Ljava/util/Map;)Ljava/util/Map; - public static fun sanitizeKey (Ljava/lang/String;)Ljava/lang/String; + public static fun sanitizeName (Ljava/lang/String;)Ljava/lang/String; + public static fun sanitizeTagKey (Ljava/lang/String;)Ljava/lang/String; + public static fun sanitizeTagValue (Ljava/lang/String;)Ljava/lang/String; public static fun sanitizeUnit (Ljava/lang/String;)Ljava/lang/String; - public static fun sanitizeValue (Ljava/lang/String;)Ljava/lang/String; public static fun setFlushShiftMs (J)V public static fun toStatsdType (Lio/sentry/metrics/MetricType;)Ljava/lang/String; } diff --git a/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java b/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java index e501a35552..f3a56c119e 100644 --- a/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java +++ b/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java @@ -1,13 +1,11 @@ package io.sentry.metrics; import io.sentry.MeasurementUnit; -import java.util.Calendar; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Random; -import java.util.TimeZone; import java.util.regex.Pattern; import org.jetbrains.annotations.ApiStatus; import org.jetbrains.annotations.NotNull; @@ -20,14 +18,9 @@ public final class MetricsHelper { public static final int MAX_TOTAL_WEIGHT = 100000; private static final int ROLLUP_IN_SECONDS = 10; - private static final Pattern INVALID_KEY_CHARACTERS_PATTERN = - Pattern.compile("[^a-zA-Z0-9_/.-]+"); - private static final Pattern INVALID_VALUE_CHARACTERS_PATTERN = - Pattern.compile("[^\\w\\d\\s_:/@\\.\\{\\}\\[\\]$-]+"); - // See - // https://docs.sysdig.com/en/docs/sysdig-monitor/integrations/working-with-integrations/custom-integrations/integrate-statsd-metrics/#characters-allowed-for-statsd-metric-names - private static final Pattern INVALID_METRIC_UNIT_CHARACTERS_PATTERN = - Pattern.compile("[^a-zA-Z0-9_/.]+"); + private static final Pattern UNIT_PATTERN = Pattern.compile("\\W+"); + private static final Pattern NAME_PATTERN = Pattern.compile("[^\\w\\-.]+"); + private static final Pattern TAG_KEY_PATTERN = Pattern.compile("[^\\w\\-./]+"); private static final char TAGS_PAIR_DELIMITER = ','; // Delimiter between key-value pairs private static final char TAGS_KEY_VALUE_DELIMITER = '='; // Delimiter between key and value @@ -36,15 +29,6 @@ public final class MetricsHelper { private static long FLUSH_SHIFT_MS = (long) (new Random().nextFloat() * (ROLLUP_IN_SECONDS * 1000f)); - public static long getDayBucketKey(final @NotNull Calendar timestamp) { - final Calendar utc = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - utc.set(Calendar.YEAR, timestamp.get(Calendar.YEAR)); - utc.set(Calendar.MONTH, timestamp.get(Calendar.MONTH)); - utc.set(Calendar.DAY_OF_MONTH, timestamp.get(Calendar.DAY_OF_MONTH)); - - return utc.getTimeInMillis() / 1000; - } - public static long getTimeBucketKey(final long timestampMs) { final long seconds = timestampMs / 1000; final long bucketKey = (seconds / ROLLUP_IN_SECONDS) * ROLLUP_IN_SECONDS; @@ -59,12 +43,50 @@ public static long getCutoffTimestampMs(final long nowMs) { return nowMs - (ROLLUP_IN_SECONDS * 1000) - FLUSH_SHIFT_MS; } - public static @NotNull String sanitizeKey(final @NotNull String input) { - return INVALID_KEY_CHARACTERS_PATTERN.matcher(input).replaceAll("_"); + @NotNull + public static String sanitizeUnit(final @NotNull String unit) { + return UNIT_PATTERN.matcher(unit).replaceAll(""); + } + + @NotNull + public static String sanitizeName(final @NotNull String input) { + return NAME_PATTERN.matcher(input).replaceAll("_"); } - public static String sanitizeValue(final @NotNull String input) { - return INVALID_VALUE_CHARACTERS_PATTERN.matcher(input).replaceAll(""); + @NotNull + public static String sanitizeTagKey(final @NotNull String input) { + return TAG_KEY_PATTERN.matcher(input).replaceAll(""); + } + + @NotNull + public static String sanitizeTagValue(final @NotNull String input) { + // see https://develop.sentry.dev/sdk/metrics/#tag-values-replacement-map + // Line feed -> \n + // Carriage return -> \r + // Tab -> \t + // Backslash -> \\ + // Pipe -> \\u{7c} + // Comma -> \\u{2c} + final StringBuilder output = new StringBuilder(input.length()); + for (int idx = 0; idx < input.length(); idx++) { + final char ch = input.charAt(idx); + if (ch == '\n') { + output.append("\\n"); + } else if (ch == '\r') { + output.append("\\r"); + } else if (ch == '\t') { + output.append("\\t"); + } else if (ch == '\\') { + output.append("\\\\"); + } else if (ch == '|') { + output.append("\\u{7c}"); + } else if (ch == ',') { + output.append("\\u{2c}"); + } else { + output.append(ch); + } + } + return output.toString(); } public static @NotNull String toStatsdType(final @NotNull MetricType type) { @@ -89,7 +111,7 @@ public static String getMetricBucketKey( final @Nullable MeasurementUnit unit, final @Nullable Map tags) { final @NotNull String typePrefix = toStatsdType(type); - final @NotNull String serializedTags = GetTagsKey(tags); + final @NotNull String serializedTags = getTagsKey(tags); final @NotNull String unitName = getUnitName(unit); return String.format("%s_%s_%s_%s", typePrefix, metricKey, unitName, serializedTags); @@ -100,7 +122,8 @@ private static String getUnitName(final @Nullable MeasurementUnit unit) { return (unit != null) ? unit.apiName() : MeasurementUnit.NONE; } - private static String GetTagsKey(final @Nullable Map tags) { + @NotNull + private static String getTagsKey(final @Nullable Map tags) { if (tags == null || tags.isEmpty()) { return ""; } @@ -197,7 +220,7 @@ public static void encodeMetrics( final @NotNull Collection metrics, final @NotNull StringBuilder writer) { for (Metric metric : metrics) { - writer.append(sanitizeKey(metric.getKey())); + writer.append(sanitizeName(metric.getKey())); writer.append("@"); final @Nullable MeasurementUnit unit = metric.getUnit(); @@ -218,7 +241,7 @@ public static void encodeMetrics( writer.append("|#"); boolean first = true; for (final @NotNull Map.Entry tag : tags.entrySet()) { - final @NotNull String tagKey = sanitizeKey(tag.getKey()); + final @NotNull String tagKey = sanitizeTagKey(tag.getKey()); if (first) { first = false; } else { @@ -226,7 +249,7 @@ public static void encodeMetrics( } writer.append(tagKey); writer.append(":"); - writer.append(sanitizeValue(tag.getValue())); + writer.append(sanitizeTagValue(tag.getValue())); } } @@ -236,11 +259,6 @@ public static void encodeMetrics( } } - @NotNull - public static String sanitizeUnit(@NotNull String unit) { - return INVALID_METRIC_UNIT_CHARACTERS_PATTERN.matcher(unit).replaceAll("_"); - } - @NotNull public static Map mergeTags( final @Nullable Map tags, final @NotNull Map defaultTags) { diff --git a/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt b/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt index 089f805a60..a164b38d7b 100644 --- a/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt +++ b/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt @@ -59,31 +59,46 @@ class MetricsHelperTest { } @Test - fun sanitizeKey() { - assertEquals("foo-bar", MetricsHelper.sanitizeKey("foo-bar")) - assertEquals("foo_bar", MetricsHelper.sanitizeKey("foo\$\$\$bar")) - assertEquals("fo_-bar", MetricsHelper.sanitizeKey("foö-bar")) + fun sanitizeName() { + assertEquals("foo-bar", MetricsHelper.sanitizeName("foo-bar")) + assertEquals("foo_bar", MetricsHelper.sanitizeName("foo\$\$\$bar")) + assertEquals("fo_-bar", MetricsHelper.sanitizeName("foö-bar")) } @Test - fun sanitizeValue() { - assertEquals("\$foo", MetricsHelper.sanitizeValue("%\$foo")) - assertEquals("blah{}", MetricsHelper.sanitizeValue("blah{}")) - assertEquals("snwmn", MetricsHelper.sanitizeValue("snöwmän")) - assertEquals("j e n g a", MetricsHelper.sanitizeValue("j e n g a!")) + fun sanitizeTagKey() { + // no replacement characters for tag keys + // - and / should be allowed + assertEquals("a/weird/tag-key/", MetricsHelper.sanitizeTagKey("a/weird/tag-key/:ä")) } @Test - fun sanitizeUnit() { - val items = listOf( - "Test123_." to "Test123_.", - "test{value}" to "test_value_", - "test-value" to "test_value" - ) + fun sanitizeTagValue() { + // https://github.com/getsentry/relay/blob/3208e3ce5b1fe4d147aa44e0e966807c256993de/relay-metrics/src/protocol.rs#L142 + assertEquals("plain", MetricsHelper.sanitizeTagValue("plain")) + assertEquals("plain text", MetricsHelper.sanitizeTagValue("plain text")) + assertEquals("plain%text", MetricsHelper.sanitizeTagValue("plain%text")) + + // Escape sequences + assertEquals("plain \\\\ text", MetricsHelper.sanitizeTagValue("plain \\ text")) + assertEquals("plain\\u{2c}text", MetricsHelper.sanitizeTagValue("plain,text")) + assertEquals("plain\\u{7c}text", MetricsHelper.sanitizeTagValue("plain|text")) + assertEquals("plain 😅", MetricsHelper.sanitizeTagValue("plain 😅")) + + // Escapable control characters (may be stripped by the parser) + assertEquals("plain\\ntext", MetricsHelper.sanitizeTagValue("plain\ntext")) + assertEquals("plain\\rtext", MetricsHelper.sanitizeTagValue("plain\rtext")) + assertEquals("plain\\ttext", MetricsHelper.sanitizeTagValue("plain\ttext")) + + // Unescapable control characters remain, as they'll be stripped by relay + assertEquals("plain\u0007text", MetricsHelper.sanitizeTagValue("plain\u0007text")) + assertEquals("plain\u009Ctext", MetricsHelper.sanitizeTagValue("plain\u009ctext")) + } - for (item in items) { - assertEquals(item.second, MetricsHelper.sanitizeUnit(item.first)) - } + @Test + fun sanitizeUnit() { + // no replacement characters for units + assertEquals("abcABC123_abcABC123", MetricsHelper.sanitizeUnit("abcABC123_-./äöü\$%&abcABC123")) } @Test