From 6d552f9e21dafced759c4b62ad4e671dbe8eede7 Mon Sep 17 00:00:00 2001 From: Markus Hintersteiner Date: Mon, 8 Apr 2024 09:06:03 +0200 Subject: [PATCH 1/5] Update normalization for Metrics --- sentry/api/sentry.api | 6 +- .../java/io/sentry/metrics/MetricsHelper.java | 82 ++++++++++++------- .../io/sentry/metrics/MetricsHelperTest.kt | 51 ++++++++---- 3 files changed, 87 insertions(+), 52 deletions(-) diff --git a/sentry/api/sentry.api b/sentry/api/sentry.api index bcfc1c9194..18d02f7436 100644 --- a/sentry/api/sentry.api +++ b/sentry/api/sentry.api @@ -3530,14 +3530,14 @@ public final class io/sentry/metrics/MetricsHelper { public static fun convertNanosTo (Lio/sentry/MeasurementUnit$Duration;J)D public static fun encodeMetrics (JLjava/util/Collection;Ljava/lang/StringBuilder;)V public static fun getCutoffTimestampMs (J)J - public static fun getDayBucketKey (Ljava/util/Calendar;)J public static fun getExportKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;)Ljava/lang/String; public static fun getMetricBucketKey (Lio/sentry/metrics/MetricType;Ljava/lang/String;Lio/sentry/MeasurementUnit;Ljava/util/Map;)Ljava/lang/String; public static fun getTimeBucketKey (J)J public static fun mergeTags (Ljava/util/Map;Ljava/util/Map;)Ljava/util/Map; - public static fun sanitizeKey (Ljava/lang/String;)Ljava/lang/String; + public static fun sanitizeName (Ljava/lang/String;)Ljava/lang/String; + public static fun sanitizeTagKey (Ljava/lang/String;)Ljava/lang/String; + public static fun sanitizeTagValue (Ljava/lang/String;)Ljava/lang/String; public static fun sanitizeUnit (Ljava/lang/String;)Ljava/lang/String; - public static fun sanitizeValue (Ljava/lang/String;)Ljava/lang/String; public static fun setFlushShiftMs (J)V public static fun toStatsdType (Lio/sentry/metrics/MetricType;)Ljava/lang/String; } diff --git a/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java b/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java index e501a35552..f83d085800 100644 --- a/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java +++ b/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java @@ -1,13 +1,11 @@ package io.sentry.metrics; import io.sentry.MeasurementUnit; -import java.util.Calendar; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.Random; -import java.util.TimeZone; import java.util.regex.Pattern; import org.jetbrains.annotations.ApiStatus; import org.jetbrains.annotations.NotNull; @@ -20,14 +18,9 @@ public final class MetricsHelper { public static final int MAX_TOTAL_WEIGHT = 100000; private static final int ROLLUP_IN_SECONDS = 10; - private static final Pattern INVALID_KEY_CHARACTERS_PATTERN = - Pattern.compile("[^a-zA-Z0-9_/.-]+"); - private static final Pattern INVALID_VALUE_CHARACTERS_PATTERN = - Pattern.compile("[^\\w\\d\\s_:/@\\.\\{\\}\\[\\]$-]+"); - // See - // https://docs.sysdig.com/en/docs/sysdig-monitor/integrations/working-with-integrations/custom-integrations/integrate-statsd-metrics/#characters-allowed-for-statsd-metric-names - private static final Pattern INVALID_METRIC_UNIT_CHARACTERS_PATTERN = - Pattern.compile("[^a-zA-Z0-9_/.]+"); + private static final Pattern UNIT_PATTERN = Pattern.compile("\\W+"); + private static final Pattern NAME_PATTERN = Pattern.compile("[^\\w\\-.]+"); + private static final Pattern TAG_KEY_PATTERN = Pattern.compile("[^\\w\\-./]+"); private static final char TAGS_PAIR_DELIMITER = ','; // Delimiter between key-value pairs private static final char TAGS_KEY_VALUE_DELIMITER = '='; // Delimiter between key and value @@ -36,15 +29,6 @@ public final class MetricsHelper { private static long FLUSH_SHIFT_MS = (long) (new Random().nextFloat() * (ROLLUP_IN_SECONDS * 1000f)); - public static long getDayBucketKey(final @NotNull Calendar timestamp) { - final Calendar utc = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - utc.set(Calendar.YEAR, timestamp.get(Calendar.YEAR)); - utc.set(Calendar.MONTH, timestamp.get(Calendar.MONTH)); - utc.set(Calendar.DAY_OF_MONTH, timestamp.get(Calendar.DAY_OF_MONTH)); - - return utc.getTimeInMillis() / 1000; - } - public static long getTimeBucketKey(final long timestampMs) { final long seconds = timestampMs / 1000; final long bucketKey = (seconds / ROLLUP_IN_SECONDS) * ROLLUP_IN_SECONDS; @@ -59,12 +43,53 @@ public static long getCutoffTimestampMs(final long nowMs) { return nowMs - (ROLLUP_IN_SECONDS * 1000) - FLUSH_SHIFT_MS; } - public static @NotNull String sanitizeKey(final @NotNull String input) { - return INVALID_KEY_CHARACTERS_PATTERN.matcher(input).replaceAll("_"); + @NotNull + public static String sanitizeUnit(final @NotNull String unit) { + return UNIT_PATTERN.matcher(unit).replaceAll(""); + } + + @NotNull + public static String sanitizeName(final @NotNull String input) { + return NAME_PATTERN.matcher(input).replaceAll("_"); + } + + @NotNull + public static String sanitizeTagKey(final @NotNull String input) { + return TAG_KEY_PATTERN.matcher(input).replaceAll(""); } - public static String sanitizeValue(final @NotNull String input) { - return INVALID_VALUE_CHARACTERS_PATTERN.matcher(input).replaceAll(""); + @NotNull + public static String sanitizeTagValue(final @NotNull String input) { + // see https://develop.sentry.dev/sdk/metrics/#tag-values-replacement-map + // see + // https://github.com/getsentry/relay/blob/3208e3ce5b1fe4d147aa44e0e966807c256993de/relay-metrics/src/protocol.rs#L51 + // Control codes -> stripped + // Line feed -> \n + // Carriage return -> \r + // Tab -> \t + // Backslash -> \\ + // Pipe -> \\u{7c} + // Comma -> \\u{2c} + final StringBuilder output = new StringBuilder(input.length()); + for (int idx = 0; idx < input.length(); idx++) { + final char ch = input.charAt(idx); + if (ch == '\n') { + output.append("\\n"); + } else if (ch == '\r') { + output.append("\\r"); + } else if (ch == '\t') { + output.append("\\t"); + } else if (ch == '\\') { + output.append("\\\\"); + } else if (ch == '|') { + output.append("\\u{7c}"); + } else if (ch == ',') { + output.append("\\u{2c}"); + } else if (!Character.isISOControl(ch)) { // control codes are simply stripped + output.append(ch); + } + } + return output.toString(); } public static @NotNull String toStatsdType(final @NotNull MetricType type) { @@ -197,7 +222,7 @@ public static void encodeMetrics( final @NotNull Collection metrics, final @NotNull StringBuilder writer) { for (Metric metric : metrics) { - writer.append(sanitizeKey(metric.getKey())); + writer.append(sanitizeName(metric.getKey())); writer.append("@"); final @Nullable MeasurementUnit unit = metric.getUnit(); @@ -218,7 +243,7 @@ public static void encodeMetrics( writer.append("|#"); boolean first = true; for (final @NotNull Map.Entry tag : tags.entrySet()) { - final @NotNull String tagKey = sanitizeKey(tag.getKey()); + final @NotNull String tagKey = sanitizeTagKey(tag.getKey()); if (first) { first = false; } else { @@ -226,7 +251,7 @@ public static void encodeMetrics( } writer.append(tagKey); writer.append(":"); - writer.append(sanitizeValue(tag.getValue())); + writer.append(sanitizeTagValue(tag.getValue())); } } @@ -236,11 +261,6 @@ public static void encodeMetrics( } } - @NotNull - public static String sanitizeUnit(@NotNull String unit) { - return INVALID_METRIC_UNIT_CHARACTERS_PATTERN.matcher(unit).replaceAll("_"); - } - @NotNull public static Map mergeTags( final @Nullable Map tags, final @NotNull Map defaultTags) { diff --git a/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt b/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt index 089f805a60..f354ad5014 100644 --- a/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt +++ b/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt @@ -59,31 +59,46 @@ class MetricsHelperTest { } @Test - fun sanitizeKey() { - assertEquals("foo-bar", MetricsHelper.sanitizeKey("foo-bar")) - assertEquals("foo_bar", MetricsHelper.sanitizeKey("foo\$\$\$bar")) - assertEquals("fo_-bar", MetricsHelper.sanitizeKey("foö-bar")) + fun sanitizeName() { + assertEquals("foo-bar", MetricsHelper.sanitizeName("foo-bar")) + assertEquals("foo_bar", MetricsHelper.sanitizeName("foo\$\$\$bar")) + assertEquals("fo_-bar", MetricsHelper.sanitizeName("foö-bar")) } @Test - fun sanitizeValue() { - assertEquals("\$foo", MetricsHelper.sanitizeValue("%\$foo")) - assertEquals("blah{}", MetricsHelper.sanitizeValue("blah{}")) - assertEquals("snwmn", MetricsHelper.sanitizeValue("snöwmän")) - assertEquals("j e n g a", MetricsHelper.sanitizeValue("j e n g a!")) + fun sanitizeTagKey() { + // no replacement characters for tag keys + // - and / should be allowed + assertEquals("a/weird/tag-key/", MetricsHelper.sanitizeTagKey("a/weird/tag-key/:ä")) } @Test - fun sanitizeUnit() { - val items = listOf( - "Test123_." to "Test123_.", - "test{value}" to "test_value_", - "test-value" to "test_value" - ) + fun sanitizeTagValue() { + // https://github.com/getsentry/relay/blob/3208e3ce5b1fe4d147aa44e0e966807c256993de/relay-metrics/src/protocol.rs#L142 + assertEquals("plain", MetricsHelper.sanitizeTagValue("plain")) + assertEquals("plain text", MetricsHelper.sanitizeTagValue("plain text")) + assertEquals("plain%text", MetricsHelper.sanitizeTagValue("plain%text")) + + // Escape sequences + assertEquals("plain \\\\ text", MetricsHelper.sanitizeTagValue("plain \\ text")) + assertEquals("plain\\u{2c}text", MetricsHelper.sanitizeTagValue("plain,text")) + assertEquals("plain\\u{7c}text", MetricsHelper.sanitizeTagValue("plain|text")) + assertEquals("plain 😅", MetricsHelper.sanitizeTagValue("plain 😅")) + + // Escapable control characters (may be stripped by the parser) + assertEquals("plain\\ntext", MetricsHelper.sanitizeTagValue("plain\ntext")) + assertEquals("plain\\rtext", MetricsHelper.sanitizeTagValue("plain\rtext")) + assertEquals("plain\\ttext", MetricsHelper.sanitizeTagValue("plain\ttext")) + + // Unescapable control characters + assertEquals("plaintext", MetricsHelper.sanitizeTagValue("plain\u0007text")) + assertEquals("plaintext", MetricsHelper.sanitizeTagValue("plain\u009ctext")) + } - for (item in items) { - assertEquals(item.second, MetricsHelper.sanitizeUnit(item.first)) - } + @Test + fun sanitizeUnit() { + // no replacement characters for units + assertEquals("abcABC123_abcABC123", MetricsHelper.sanitizeUnit("abcABC123_-./äöü\$%&abcABC123")) } @Test From 60de6cb694e37a484b6d90d078e43a661f54c982 Mon Sep 17 00:00:00 2001 From: Markus Hintersteiner Date: Mon, 8 Apr 2024 09:08:49 +0200 Subject: [PATCH 2/5] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 214ae221e7..e0fead0679 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Features - Add support for Spring Rest Client ([#3199](https://github.com/getsentry/sentry-java/pull/3199)) +- Metrics: Update normalization of keys, tags and values ([#3332](https://github.com/getsentry/sentry-java/pull/3332)) ### Fixes From ad2f6eec88703ed735e4ca1a08633ab8f8eff522 Mon Sep 17 00:00:00 2001 From: Markus Hintersteiner Date: Mon, 8 Apr 2024 10:10:51 +0200 Subject: [PATCH 3/5] Do not escape control chars, as they get stripped by relay --- .../src/main/java/io/sentry/metrics/MetricsHelper.java | 10 ++++------ .../test/java/io/sentry/metrics/MetricsHelperTest.kt | 6 +++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java b/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java index f83d085800..f3a56c119e 100644 --- a/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java +++ b/sentry/src/main/java/io/sentry/metrics/MetricsHelper.java @@ -61,9 +61,6 @@ public static String sanitizeTagKey(final @NotNull String input) { @NotNull public static String sanitizeTagValue(final @NotNull String input) { // see https://develop.sentry.dev/sdk/metrics/#tag-values-replacement-map - // see - // https://github.com/getsentry/relay/blob/3208e3ce5b1fe4d147aa44e0e966807c256993de/relay-metrics/src/protocol.rs#L51 - // Control codes -> stripped // Line feed -> \n // Carriage return -> \r // Tab -> \t @@ -85,7 +82,7 @@ public static String sanitizeTagValue(final @NotNull String input) { output.append("\\u{7c}"); } else if (ch == ',') { output.append("\\u{2c}"); - } else if (!Character.isISOControl(ch)) { // control codes are simply stripped + } else { output.append(ch); } } @@ -114,7 +111,7 @@ public static String getMetricBucketKey( final @Nullable MeasurementUnit unit, final @Nullable Map tags) { final @NotNull String typePrefix = toStatsdType(type); - final @NotNull String serializedTags = GetTagsKey(tags); + final @NotNull String serializedTags = getTagsKey(tags); final @NotNull String unitName = getUnitName(unit); return String.format("%s_%s_%s_%s", typePrefix, metricKey, unitName, serializedTags); @@ -125,7 +122,8 @@ private static String getUnitName(final @Nullable MeasurementUnit unit) { return (unit != null) ? unit.apiName() : MeasurementUnit.NONE; } - private static String GetTagsKey(final @Nullable Map tags) { + @NotNull + private static String getTagsKey(final @Nullable Map tags) { if (tags == null || tags.isEmpty()) { return ""; } diff --git a/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt b/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt index f354ad5014..a164b38d7b 100644 --- a/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt +++ b/sentry/src/test/java/io/sentry/metrics/MetricsHelperTest.kt @@ -90,9 +90,9 @@ class MetricsHelperTest { assertEquals("plain\\rtext", MetricsHelper.sanitizeTagValue("plain\rtext")) assertEquals("plain\\ttext", MetricsHelper.sanitizeTagValue("plain\ttext")) - // Unescapable control characters - assertEquals("plaintext", MetricsHelper.sanitizeTagValue("plain\u0007text")) - assertEquals("plaintext", MetricsHelper.sanitizeTagValue("plain\u009ctext")) + // Unescapable control characters remain, as they'll be stripped by relay + assertEquals("plain\u0007text", MetricsHelper.sanitizeTagValue("plain\u0007text")) + assertEquals("plain\u009Ctext", MetricsHelper.sanitizeTagValue("plain\u009ctext")) } @Test From ab7cf94448ad0e5589cab49201c29c490d527c81 Mon Sep 17 00:00:00 2001 From: Markus Hintersteiner Date: Mon, 8 Apr 2024 15:53:14 +0200 Subject: [PATCH 4/5] Update Changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e1923fe24..d117e751eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,18 @@ # Changelog +## Unreleased + +## Features + +- Update normalization of metrics keys, tags and values ([#3332](https://github.com/getsentry/sentry-java/pull/3332)) + + ## 7.7.0 ### Features - Add support for Spring Rest Client ([#3199](https://github.com/getsentry/sentry-java/pull/3199)) - Extend Proxy options with proxy type ([#3326](https://github.com/getsentry/sentry-java/pull/3326)) -- Update normalization of metrics keys, tags and values ([#3332](https://github.com/getsentry/sentry-java/pull/3332)) ### Fixes From 7bfdc47552e837b9c80ddb218759f60119fc2ff9 Mon Sep 17 00:00:00 2001 From: Markus Hintersteiner Date: Mon, 8 Apr 2024 15:53:58 +0200 Subject: [PATCH 5/5] Update Changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d117e751eb..ea18a80dc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,6 @@ - Update normalization of metrics keys, tags and values ([#3332](https://github.com/getsentry/sentry-java/pull/3332)) - ## 7.7.0 ### Features