feat(ndk): add support for reading modified-utf8 sequences from `Byte…

…Buffers`
bugsnag · Jan 25, 2024 · 9fbc9ea · 9fbc9ea
1 parent 6f5ad4b
commit 9fbc9ea
Show file tree

Hide file tree

Showing 3 changed files with 203 additions and 3 deletions.
diff --git a/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/ByteBufferExtensions.kt b/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/ByteBufferExtensions.kt
@@ -1,11 +1,106 @@
 package com.bugsnag.android.ndk
 
 import java.nio.ByteBuffer
+import kotlin.math.min
+
+private const val UTF_REPLACEMENT_CHAR = '\uFFFD'
 
 internal fun ByteBuffer.getNativeInt(): Int = getInt()
 internal fun ByteBuffer.getNativeLong(): Long = getLong()
 
-internal fun ByteBuffer.getCString(byteCount: Int): String {
-    position(position() + byteCount)
-    return ""
+/**
+ * Decode [allocatedByteCount] as a null-terminated sequence of modified UTF-8 bytes. This reads
+ * the same format as the JNI `NewUTFStringUTF` function, but also obeys a null-terminator character
+ * used in C. This function will always consume *exactly* [allocatedByteCount] from this
+ * `ByteBuffer`, but may return a `String` of fewer (or event zero) characters. This function
+ * will always return a `String` and invalid UTF-8 sequences will cause the function to return
+ * what has been successfully decoded up to that point.
+ */
+internal fun ByteBuffer.getCString(allocatedByteCount: Int): String {
+    val origin = position()
+    val maxBytes = min(allocatedByteCount, remaining())
+
+    // allocate a CharArray to handle the decoded string
+    // it can't be longer than the number of bytes in the buffer
+    val chars = CharArray(maxBytes)
+    var bytesRead = 0
+    var outIndex = 0
+    var c = 0
+
+    // fast path for ASCII-7 compatible characters / strings
+    while (bytesRead < maxBytes) {
+        c = get(origin + bytesRead).toInt() and 0xff
+        if (c >= 128) break // we need to take the "slow" path
+        if (c == 0) break // null-terminator - this is the end of the string
+
+        chars[outIndex++] = c.toChar()
+        bytesRead++
+    }
+
+    // make sure we didn't previously reach the end of the string
+    if (c != 0) {
+        while (bytesRead < maxBytes) {
+            c = get(origin + bytesRead).toInt() and 0xff
+            if (c == 0) {
+                // null-terminator - this is the end of the string
+                break
+            }
+
+            when (c shr 4) {
+                0, 1, 2, 3, 4, 5, 6, 7 -> {
+                    /* 0xxxxxxx*/
+                    bytesRead++
+                    chars[outIndex++] = c.toChar()
+                }
+
+                12, 13 -> {
+                    /* 110x xxxx   10xx xxxx*/
+                    bytesRead += 2
+                    if (bytesRead > maxBytes) {
+                        // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                        chars[outIndex++] = UTF_REPLACEMENT_CHAR
+                        break
+                    }
+
+                    val char2 = get(origin + bytesRead - 1).toInt() and 0xff
+                    if (char2 and 0xc0 != 0x80) {
+                        // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                        chars[outIndex++] = UTF_REPLACEMENT_CHAR
+                    }
+
+                    chars[outIndex++] = ((c and 0x1f shl 6) or (char2 and 0x3f)).toChar()
+                }
+
+                14 -> {
+                    /* 1110 xxxx  10xx xxxx  10xx xxxx */
+                    bytesRead += 3
+                    if (bytesRead > maxBytes) {
+                        // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                        chars[outIndex++] = UTF_REPLACEMENT_CHAR
+                        break
+                    }
+
+                    val char2 = get(origin + bytesRead - 2).toInt() and 0xff
+                    val char3 = get(origin + bytesRead - 1).toInt() and 0xff
+                    if (char2 and 0xc0 != 0x80 || char3 and 0xc0 != 0x80) {
+                        // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                        chars[outIndex++] = UTF_REPLACEMENT_CHAR
+                    }
+
+                    chars[outIndex++] =
+                        ((c and 0x0f shl 12) or (char2 and 0x3f shl 6) or (char3 and 0x3f)).toChar()
+                }
+
+                else -> {
+                    // Invalid UTF-8 - but we don't error out, we return what we *do* have
+                    chars[outIndex++] = UTF_REPLACEMENT_CHAR
+                    break
+                }
+            }
+        }
+    }
+
+    // move the ByteBuffer position to after the string
+    position(origin + maxBytes)
+    return String(chars, 0, outIndex)
 }
diff --git a/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/NativeEventDecoder.kt b/bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/NativeEventDecoder.kt
@@ -25,7 +25,31 @@ internal object NativeEventDecoder {
         eventBytes: ByteBuffer
     ): Event {
         eventBytes.order(ByteOrder.nativeOrder())
+
+        val header = decodeHeader(eventBytes)
+        if (header.version != 13) {
+            throw IllegalArgumentException("Unsupported event version: ${header.version}")
+        }
+
+        if (header.bigEndian == 0) {
+            eventBytes.order(ByteOrder.BIG_ENDIAN)
+        }
+
         @Suppress("StopShip") // This is targeting an integration branch
         TODO("To be completed")
     }
+
+    private fun decodeHeader(eventBytes: ByteBuffer): NativeEventHeader {
+        return NativeEventHeader(
+            eventBytes.getNativeInt(),
+            eventBytes.getNativeInt(),
+            eventBytes.getCString(64)
+        )
+    }
+
+    private data class NativeEventHeader(
+        val version: Int,
+        val bigEndian: Int,
+        val osBuild: String
+    )
 }
diff --git a/bugsnag-plugin-android-ndk/src/test/java/com/bugsnag/android/ndk/CStringDecoderTest.kt b/bugsnag-plugin-android-ndk/src/test/java/com/bugsnag/android/ndk/CStringDecoderTest.kt
@@ -0,0 +1,81 @@
+package com.bugsnag.android.ndk
+
+import org.junit.Assert.assertEquals
+import org.junit.Test
+import java.nio.ByteBuffer
+
+class CStringDecoderTest {
+    @Test
+    fun testAscii7Compatible() {
+        val buffer = ByteBuffer.wrap(
+            byteArrayOf(
+                0x63, 0x6f, 0x6d, 0x2e, 0x65, 0x78, 0x61, 0x6d,
+                0x70, 0x6c, 0x65, 0x2e, 0x62, 0x75, 0x67, 0x73,
+                0x6e, 0x61, 0x67, 0x2e, 0x61, 0x6e, 0x64, 0x72,
+                0x6f, 0x69, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00
+            )
+        )
+
+        assertEquals("com.example.bugsnag.android", buffer.getCString(buffer.remaining()))
+    }
+
+    @Test
+    fun testEmptyString() {
+    }
+
+    @Test
+    fun testNonAscii7Compatible() {
+        val buffer = ByteBuffer.wrap(extendedBytes)
+        assertEquals("はい、これは機械翻訳で書かれています", buffer.getCString(buffer.remaining()))
+    }
+
+    @Test
+    fun testInvalidStrings() {
+        val buffer = ByteBuffer.wrap(extendedBytes)
+        assertEquals("はい、これは機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))
+
+        buffer.rewind()
+        buffer.put(16, 32)
+        assertEquals("はい、これ�㠯機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))
+    }
+
+    @Test
+    fun testGreekStrings() {
+        val buffer = ByteBuffer.wrap(greekBytes)
+        assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφραση", buffer.getCString(buffer.remaining()))
+    }
+    @Test
+    fun testInvalidGreekStrings() {
+        val buffer = ByteBuffer.wrap(greekBytes)
+        assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))
+
+        buffer.rewind()
+        buffer.put(9, 32)
+        assertEquals("ναι, �Πυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))
+    }
+
+    private val greekBytes = byteArrayOf(
+        -50, -67, -50, -79, -50, -71, 44, 32,
+        -50, -79, -49, -123, -49, -124, -49, -116,
+        32, -50, -77, -49, -127, -50, -84, -49,
+        -122, -49, -124, -50, -73, -50, -70, -50,
+        -75, 32, -50, -68, -50, -75, 32, -50,
+        -79, -49, -123, -49, -124, -49, -116, -50,
+        -68, -50, -79, -49, -124, -50, -73, 32,
+        -50, -68, -50, -75, -49, -124, -50, -84,
+        -49, -122, -49, -127, -50, -79, -49, -125,
+        -50, -73,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    )
+
+    private val extendedBytes = byteArrayOf(
+        -29, -127, -81, -29, -127, -124, -29, -128,
+        -127, -29, -127, -109, -29, -126, -116, -29,
+        -127, -81, -26, -87, -97, -26, -94, -80,
+        -25, -65, -69, -24, -88, -77, -29, -127,
+        -89, -26, -101, -72, -29, -127, -117, -29,
+        -126, -116, -29, -127, -90, -29, -127, -124,
+        -29, -127, -66, -29, -127, -103,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    )
+}