Skip to content

Commit

Permalink
feat(ndk): add support for reading modified-utf8 sequences from `Byte…
Browse files Browse the repository at this point in the history
…Buffers`
  • Loading branch information
lemnik committed Jan 25, 2024
1 parent 39d7246 commit 91d906a
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ package com.bugsnag.android.ndk

import com.bugsnag.android.NativeInterface
import java.nio.ByteBuffer
import kotlin.math.min

private const val UTF_REPLACEMENT_CHAR = '\uFFFD'

private val is32bit: Boolean by lazy {
val abis = NativeInterface.getCpuAbi()
Expand All @@ -11,7 +14,99 @@ private val is32bit: Boolean by lazy {
internal fun ByteBuffer.getNativeInt(): Int = getInt()
internal fun ByteBuffer.getNativeLong(): Long = getLong()

internal fun ByteBuffer.getCString(byteCount: Int): String {
position(position() + byteCount)
return ""
/**
* Decode [allocatedByteCount] as a null-terminated sequence of modified UTF-8 bytes. This reads
* the same format as the JNI `NewUTFStringUTF` function, but also obeys a null-terminator character
* used in C. This function will always consume *exactly* [allocatedByteCount] from this
* `ByteBuffer`, but may return a `String` of fewer (or event zero) characters. This function
* will always return a `String` and invalid UTF-8 sequences will cause the function to return
* what has been successfully decoded up to that point.
*/
internal fun ByteBuffer.getCString(allocatedByteCount: Int): String {
val origin = position()
val maxBytes = min(allocatedByteCount, remaining())

// allocate a CharArray to handle the decoded string
// it can't be longer than the number of bytes in the buffer
val chars = CharArray(maxBytes)
var bytesRead = 0
var outIndex = 0
var c = 0

// fast path for ASCII-7 compatible characters / strings
while (bytesRead < maxBytes) {
c = get(origin + bytesRead).toInt() and 0xff
if (c >= 128) break // we need to take the "slow" path
if (c == 0) break // null-terminator - this is the end of the string

chars[outIndex++] = c.toChar()
bytesRead++
}

// make sure we didn't previously reach the end of the string
if (c != 0) {
while (bytesRead < maxBytes) {
c = get(origin + bytesRead).toInt() and 0xff
if (c == 0) {
// null-terminator - this is the end of the string
break
}

when (c shr 4) {
0, 1, 2, 3, 4, 5, 6, 7 -> {
/* 0xxxxxxx*/
bytesRead++
chars[outIndex++] = c.toChar()
}

12, 13 -> {
/* 110x xxxx 10xx xxxx*/
bytesRead += 2
if (bytesRead > maxBytes) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
chars[outIndex++] = UTF_REPLACEMENT_CHAR
break
}

val char2 = get(origin + bytesRead - 1).toInt() and 0xff
if (char2 and 0xc0 != 0x80) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
chars[outIndex++] = UTF_REPLACEMENT_CHAR
}

chars[outIndex++] = ((c and 0x1f shl 6) or (char2 and 0x3f)).toChar()
}

14 -> {
/* 1110 xxxx 10xx xxxx 10xx xxxx */
bytesRead += 3
if (bytesRead > maxBytes) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
chars[outIndex++] = UTF_REPLACEMENT_CHAR
break
}

val char2 = get(origin + bytesRead - 2).toInt() and 0xff
val char3 = get(origin + bytesRead - 1).toInt() and 0xff
if (char2 and 0xc0 != 0x80 || char3 and 0xc0 != 0x80) {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
chars[outIndex++] = UTF_REPLACEMENT_CHAR
}

chars[outIndex++] =
((c and 0x0f shl 12) or (char2 and 0x3f shl 6) or (char3 and 0x3f)).toChar()
}

else -> {
// Invalid UTF-8 - but we don't error out, we return what we *do* have
chars[outIndex++] = UTF_REPLACEMENT_CHAR
break
}
}
}
}

// move the ByteBuffer position to after the string
position(origin + maxBytes)
return String(chars, 0, outIndex)
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,30 @@ internal object NativeEventDecoder {
eventBytes: ByteBuffer
): Event {
eventBytes.order(ByteOrder.nativeOrder())

val header = decodeHeader(eventBytes)
if (header.version != 13) {
throw IllegalArgumentException("Unsupported event version: ${header.version}")
}

if (header.bigEndian == 0) {
eventBytes.order(ByteOrder.BIG_ENDIAN)
}

TODO("To be completed")
}

private fun decodeHeader(eventBytes: ByteBuffer): NativeEventHeader {
return NativeEventHeader(
eventBytes.getNativeInt(),
eventBytes.getNativeInt(),
eventBytes.getCString(64)
)
}

private data class NativeEventHeader(
val version: Int,
val bigEndian: Int,
val osBuild: String
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package com.bugsnag.android.ndk

import org.junit.Assert.*
import org.junit.Test
import java.nio.ByteBuffer

class CStringDecoderTest {
@Test
fun testAscii7Compatible() {
val buffer = ByteBuffer.wrap(
byteArrayOf(
0x63, 0x6f, 0x6d, 0x2e, 0x65, 0x78, 0x61, 0x6d,
0x70, 0x6c, 0x65, 0x2e, 0x62, 0x75, 0x67, 0x73,
0x6e, 0x61, 0x67, 0x2e, 0x61, 0x6e, 0x64, 0x72,
0x6f, 0x69, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00
)
)

assertEquals("com.example.bugsnag.android", buffer.getCString(buffer.remaining()))
}

@Test
fun testEmptyString() {

}

@Test
fun testNonAscii7Compatible() {
val buffer = ByteBuffer.wrap(extendedBytes)
assertEquals("はい、これは機械翻訳で書かれています", buffer.getCString(buffer.remaining()))
}

@Test
fun testInvalidStrings() {
val buffer = ByteBuffer.wrap(extendedBytes)
assertEquals("はい、これは機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))

buffer.rewind()
buffer.put(16, 32)
assertEquals("はい、これ�㠯機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1))
}

@Test
fun testGreekStrings() {
val buffer = ByteBuffer.wrap(greekBytes)
assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφραση", buffer.getCString(buffer.remaining()))
}
@Test
fun testInvalidGreekStrings() {
val buffer = ByteBuffer.wrap(greekBytes)
assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))

buffer.rewind()
buffer.put(9, 32)
assertEquals("ναι, �Πυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1))
}

private val greekBytes = byteArrayOf(
-50, -67, -50, -79, -50, -71, 44, 32,
-50, -79, -49, -123, -49, -124, -49, -116,
32, -50, -77, -49, -127, -50, -84, -49,
-122, -49, -124, -50, -73, -50, -70, -50,
-75, 32, -50, -68, -50, -75, 32, -50,
-79, -49, -123, -49, -124, -49, -116, -50,
-68, -50, -79, -49, -124, -50, -73, 32,
-50, -68, -50, -75, -49, -124, -50, -84,
-49, -122, -49, -127, -50, -79, -49, -125,
-50, -73,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
)

private val extendedBytes = byteArrayOf(
-29, -127, -81, -29, -127, -124, -29, -128,
-127, -29, -127, -109, -29, -126, -116, -29,
-127, -81, -26, -87, -97, -26, -94, -80,
-25, -65, -69, -24, -88, -77, -29, -127,
-89, -26, -101, -72, -29, -127, -117, -29,
-126, -116, -29, -127, -90, -29, -127, -124,
-29, -127, -66, -29, -127, -103,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
)
}

0 comments on commit 91d906a

Please sign in to comment.