diff --git a/icu4c/source/common/ucnv_u8.cpp b/icu4c/source/common/ucnv_u8.cpp index 951988ed9ca3..5d72f8ef377b 100644 --- a/icu4c/source/common/ucnv_u8.cpp +++ b/icu4c/source/common/ucnv_u8.cpp @@ -28,6 +28,7 @@ #include "unicode/utf.h" #include "unicode/utf8.h" #include "unicode/utf16.h" +#include "uassert.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" #include "cmemory.h" @@ -694,7 +695,9 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, // Use a single counter for source and target, counting the minimum of // the source length and the target capacity. // Let the standard converter handle edge cases. + const uint8_t *limit=sourceLimit; if(count>targetCapacity) { + limit-=(count-targetCapacity); count=targetCapacity; } @@ -707,11 +710,11 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, // sequence from the previous buffer. int32_t length=count-toULimit; if(length>0) { - uint8_t b1=*(sourceLimit-1); + uint8_t b1=*(limit-1); if(U8_IS_SINGLE(b1)) { // common ASCII character } else if(U8_IS_TRAIL(b1) && length>=2) { - uint8_t b2=*(sourceLimit-2); + uint8_t b2=*(limit-2); if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { // truncated 3-byte sequence count-=2; @@ -811,7 +814,7 @@ ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, } /* copy the legal byte sequence to the target */ - { + if(count>=toULength) { int8_t i; for(i=0; isource=(char *)source; + pFromUArgs->target=(char *)target; + *pErrorCode=U_USING_DEFAULT_WARNING; + return; } } } + U_ASSERT(count>=0); if(U_SUCCESS(*pErrorCode) && sourcetargetLimit) { diff --git a/icu4c/source/test/intltest/convtest.cpp b/icu4c/source/test/intltest/convtest.cpp index 6286ff54f32d..db0aa86912bc 100644 --- a/icu4c/source/test/intltest/convtest.cpp +++ b/icu4c/source/test/intltest/convtest.cpp @@ -68,21 +68,16 @@ ConversionTest::~ConversionTest() { void ConversionTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) { if (exec) logln("TestSuite ConversionTest: "); - switch (index) { + TESTCASE_AUTO_BEGIN; #if !UCONFIG_NO_FILE_IO - case 0: name="TestToUnicode"; if (exec) TestToUnicode(); break; - case 1: name="TestFromUnicode"; if (exec) TestFromUnicode(); break; - case 2: name="TestGetUnicodeSet"; if (exec) TestGetUnicodeSet(); break; - case 3: name="TestDefaultIgnorableCallback"; if (exec) TestDefaultIgnorableCallback(); break; -#else - case 0: - case 1: - case 2: - case 3: name="skip"; break; + TESTCASE_AUTO(TestToUnicode); + TESTCASE_AUTO(TestFromUnicode); + TESTCASE_AUTO(TestGetUnicodeSet); #endif - case 4: name="TestGetUnicodeSet2"; if (exec) TestGetUnicodeSet2(); break; - default: name=""; break; //needed to end loop - } + TESTCASE_AUTO(TestGetUnicodeSet2); + TESTCASE_AUTO(TestDefaultIgnorableCallback); + TESTCASE_AUTO(TestUTF8ToUTF8Overflow); + TESTCASE_AUTO_END; } // test data interface ----------------------------------------------------- *** @@ -723,6 +718,80 @@ ConversionTest::TestDefaultIgnorableCallback() { delete set_ignorable; } +void +ConversionTest::TestUTF8ToUTF8Overflow() { + IcuTestErrorCode errorCode(*this, "TestUTF8ToUTF8Overflow"); + LocalUConverterPointer cnv1(ucnv_open("UTF-8", errorCode)); + LocalUConverterPointer cnv2(ucnv_open("UTF-8", errorCode)); + static const char *text = "aä"; // ä: 2 bytes + const char *source = text; + const char *sourceLimit = text + strlen(text); + char result[20]; + char *target = result; + const char *targetLimit = result + sizeof(result); + UChar buffer16[20]; + UChar *pivotSource = buffer16; + UChar *pivotTarget = buffer16; + const UChar *pivotLimit = buffer16 + UPRV_LENGTHOF(buffer16); + + // Convert with insufficient target capacity. + result[2] = 5; + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, result + 2, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, FALSE, errorCode); + assertEquals("overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); + int32_t length = (int32_t)(target - result); + assertEquals("number of bytes written", 2, length); + assertEquals("next byte not clobbered", 5, result[2]); + + // Convert the rest and flush. + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, targetLimit, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, TRUE, errorCode); + + assertSuccess("UTF-8->UTF-8", errorCode); + length = (int32_t)(target - result); + assertEquals("3 bytes", 3, length); + if (length == 3) { + assertTrue("result same as input", memcmp(text, result, length) == 0); + } + + ucnv_reset(cnv1.getAlias()); + ucnv_reset(cnv2.getAlias()); + memset(result, 0, sizeof(result)); + static const char *text2 = "a🚲"; // U+1F6B2 bicycle: 4 bytes + source = text2; + sourceLimit = text2 + strlen(text2); + target = result; + pivotSource = pivotTarget = buffer16; + + // Convert with insufficient target capacity. + result[3] = 5; + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, result + 3, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, FALSE, errorCode); + assertEquals("text2 overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset()); + length = (int32_t)(target - result); + assertEquals("text2 number of bytes written", 3, length); + assertEquals("text2 next byte not clobbered", 5, result[3]); + + // Convert the rest and flush. + ucnv_convertEx(cnv2.getAlias(), cnv1.getAlias(), + &target, targetLimit, &source, sourceLimit, + buffer16, &pivotSource, &pivotTarget, pivotLimit, + FALSE, TRUE, errorCode); + + assertSuccess("text2 UTF-8->UTF-8", errorCode); + length = (int32_t)(target - result); + assertEquals("text2 5 bytes", 5, length); + if (length == 5) { + assertTrue("text2 result same as input", memcmp(text2, result, length) == 0); + } +} + // open testdata or ICU data converter ------------------------------------- *** UConverter * diff --git a/icu4c/source/test/intltest/convtest.h b/icu4c/source/test/intltest/convtest.h index c2d37e48974a..84a3a89a5033 100644 --- a/icu4c/source/test/intltest/convtest.h +++ b/icu4c/source/test/intltest/convtest.h @@ -76,6 +76,7 @@ class ConversionTest : public IntlTest { void TestGetUnicodeSet(); void TestGetUnicodeSet2(); void TestDefaultIgnorableCallback(); + void TestUTF8ToUTF8Overflow(); private: UBool