-
Notifications
You must be signed in to change notification settings - Fork 207
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ndk): add support for reading modified-utf8 sequences from `Byte…
…Buffers`
- Loading branch information
Showing
3 changed files
with
203 additions
and
3 deletions.
There are no files selected for viewing
101 changes: 98 additions & 3 deletions
101
bugsnag-plugin-android-ndk/src/main/java/com/bugsnag/android/ndk/ByteBufferExtensions.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,106 @@ | ||
package com.bugsnag.android.ndk | ||
|
||
import java.nio.ByteBuffer | ||
import kotlin.math.min | ||
|
||
private const val UTF_REPLACEMENT_CHAR = '\uFFFD' | ||
|
||
internal fun ByteBuffer.getNativeInt(): Int = getInt() | ||
internal fun ByteBuffer.getNativeLong(): Long = getLong() | ||
|
||
internal fun ByteBuffer.getCString(byteCount: Int): String { | ||
position(position() + byteCount) | ||
return "" | ||
/** | ||
* Decode [allocatedByteCount] as a null-terminated sequence of modified UTF-8 bytes. This reads | ||
* the same format as the JNI `NewUTFStringUTF` function, but also obeys a null-terminator character | ||
* used in C. This function will always consume *exactly* [allocatedByteCount] from this | ||
* `ByteBuffer`, but may return a `String` of fewer (or event zero) characters. This function | ||
* will always return a `String` and invalid UTF-8 sequences will cause the function to return | ||
* what has been successfully decoded up to that point. | ||
*/ | ||
internal fun ByteBuffer.getCString(allocatedByteCount: Int): String { | ||
val origin = position() | ||
val maxBytes = min(allocatedByteCount, remaining()) | ||
|
||
// allocate a CharArray to handle the decoded string | ||
// it can't be longer than the number of bytes in the buffer | ||
val chars = CharArray(maxBytes) | ||
var bytesRead = 0 | ||
var outIndex = 0 | ||
var c = 0 | ||
|
||
// fast path for ASCII-7 compatible characters / strings | ||
while (bytesRead < maxBytes) { | ||
c = get(origin + bytesRead).toInt() and 0xff | ||
if (c >= 128) break // we need to take the "slow" path | ||
if (c == 0) break // null-terminator - this is the end of the string | ||
|
||
chars[outIndex++] = c.toChar() | ||
bytesRead++ | ||
} | ||
|
||
// make sure we didn't previously reach the end of the string | ||
if (c != 0) { | ||
while (bytesRead < maxBytes) { | ||
c = get(origin + bytesRead).toInt() and 0xff | ||
if (c == 0) { | ||
// null-terminator - this is the end of the string | ||
break | ||
} | ||
|
||
when (c shr 4) { | ||
0, 1, 2, 3, 4, 5, 6, 7 -> { | ||
/* 0xxxxxxx*/ | ||
bytesRead++ | ||
chars[outIndex++] = c.toChar() | ||
} | ||
|
||
12, 13 -> { | ||
/* 110x xxxx 10xx xxxx*/ | ||
bytesRead += 2 | ||
if (bytesRead > maxBytes) { | ||
// Invalid UTF-8 - but we don't error out, we return what we *do* have | ||
chars[outIndex++] = UTF_REPLACEMENT_CHAR | ||
break | ||
} | ||
|
||
val char2 = get(origin + bytesRead - 1).toInt() and 0xff | ||
if (char2 and 0xc0 != 0x80) { | ||
// Invalid UTF-8 - but we don't error out, we return what we *do* have | ||
chars[outIndex++] = UTF_REPLACEMENT_CHAR | ||
} | ||
|
||
chars[outIndex++] = ((c and 0x1f shl 6) or (char2 and 0x3f)).toChar() | ||
} | ||
|
||
14 -> { | ||
/* 1110 xxxx 10xx xxxx 10xx xxxx */ | ||
bytesRead += 3 | ||
if (bytesRead > maxBytes) { | ||
// Invalid UTF-8 - but we don't error out, we return what we *do* have | ||
chars[outIndex++] = UTF_REPLACEMENT_CHAR | ||
break | ||
} | ||
|
||
val char2 = get(origin + bytesRead - 2).toInt() and 0xff | ||
val char3 = get(origin + bytesRead - 1).toInt() and 0xff | ||
if (char2 and 0xc0 != 0x80 || char3 and 0xc0 != 0x80) { | ||
// Invalid UTF-8 - but we don't error out, we return what we *do* have | ||
chars[outIndex++] = UTF_REPLACEMENT_CHAR | ||
} | ||
|
||
chars[outIndex++] = | ||
((c and 0x0f shl 12) or (char2 and 0x3f shl 6) or (char3 and 0x3f)).toChar() | ||
} | ||
|
||
else -> { | ||
// Invalid UTF-8 - but we don't error out, we return what we *do* have | ||
chars[outIndex++] = UTF_REPLACEMENT_CHAR | ||
break | ||
} | ||
} | ||
} | ||
} | ||
|
||
// move the ByteBuffer position to after the string | ||
position(origin + maxBytes) | ||
return String(chars, 0, outIndex) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
81 changes: 81 additions & 0 deletions
81
bugsnag-plugin-android-ndk/src/test/java/com/bugsnag/android/ndk/CStringDecoderTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package com.bugsnag.android.ndk | ||
|
||
import org.junit.Assert.assertEquals | ||
import org.junit.Test | ||
import java.nio.ByteBuffer | ||
|
||
class CStringDecoderTest { | ||
@Test | ||
fun testAscii7Compatible() { | ||
val buffer = ByteBuffer.wrap( | ||
byteArrayOf( | ||
0x63, 0x6f, 0x6d, 0x2e, 0x65, 0x78, 0x61, 0x6d, | ||
0x70, 0x6c, 0x65, 0x2e, 0x62, 0x75, 0x67, 0x73, | ||
0x6e, 0x61, 0x67, 0x2e, 0x61, 0x6e, 0x64, 0x72, | ||
0x6f, 0x69, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00 | ||
) | ||
) | ||
|
||
assertEquals("com.example.bugsnag.android", buffer.getCString(buffer.remaining())) | ||
} | ||
|
||
@Test | ||
fun testEmptyString() { | ||
} | ||
|
||
@Test | ||
fun testNonAscii7Compatible() { | ||
val buffer = ByteBuffer.wrap(extendedBytes) | ||
assertEquals("はい、これは機械翻訳で書かれています", buffer.getCString(buffer.remaining())) | ||
} | ||
|
||
@Test | ||
fun testInvalidStrings() { | ||
val buffer = ByteBuffer.wrap(extendedBytes) | ||
assertEquals("はい、これは機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1)) | ||
|
||
buffer.rewind() | ||
buffer.put(16, 32) | ||
assertEquals("はい、これ�㠯機械翻訳で書かれていま�", buffer.getCString(extendedBytes.indexOf(0) - 1)) | ||
} | ||
|
||
@Test | ||
fun testGreekStrings() { | ||
val buffer = ByteBuffer.wrap(greekBytes) | ||
assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφραση", buffer.getCString(buffer.remaining())) | ||
} | ||
@Test | ||
fun testInvalidGreekStrings() { | ||
val buffer = ByteBuffer.wrap(greekBytes) | ||
assertEquals("ναι, αυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1)) | ||
|
||
buffer.rewind() | ||
buffer.put(9, 32) | ||
assertEquals("ναι, �Πυτό γράφτηκε με αυτόματη μετάφρασ�", buffer.getCString(greekBytes.indexOf(0) - 1)) | ||
} | ||
|
||
private val greekBytes = byteArrayOf( | ||
-50, -67, -50, -79, -50, -71, 44, 32, | ||
-50, -79, -49, -123, -49, -124, -49, -116, | ||
32, -50, -77, -49, -127, -50, -84, -49, | ||
-122, -49, -124, -50, -73, -50, -70, -50, | ||
-75, 32, -50, -68, -50, -75, 32, -50, | ||
-79, -49, -123, -49, -124, -49, -116, -50, | ||
-68, -50, -79, -49, -124, -50, -73, 32, | ||
-50, -68, -50, -75, -49, -124, -50, -84, | ||
-49, -122, -49, -127, -50, -79, -49, -125, | ||
-50, -73, | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | ||
) | ||
|
||
private val extendedBytes = byteArrayOf( | ||
-29, -127, -81, -29, -127, -124, -29, -128, | ||
-127, -29, -127, -109, -29, -126, -116, -29, | ||
-127, -81, -26, -87, -97, -26, -94, -80, | ||
-25, -65, -69, -24, -88, -77, -29, -127, | ||
-89, -26, -101, -72, -29, -127, -117, -29, | ||
-126, -116, -29, -127, -90, -29, -127, -124, | ||
-29, -127, -66, -29, -127, -103, | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | ||
) | ||
} |