From 2df1ab724040dc9cb25a4df7f0244cfe79ffef62 Mon Sep 17 00:00:00 2001 From: Fredrik Roubert Date: Mon, 25 Sep 2023 19:32:34 +0200 Subject: [PATCH] ICU-21289 Switch to using CharString for calling uloc_canonicalize(). --- icu4c/source/common/loclikely.cpp | 51 +++++++----------------------- icu4c/source/common/locresdata.cpp | 14 +++++--- icu4c/source/common/uloc_tag.cpp | 49 ++++------------------------ icu4c/source/i18n/calendar.cpp | 14 ++++---- icu4c/source/i18n/ucol_sit.cpp | 23 ++++++++------ 5 files changed, 48 insertions(+), 103 deletions(-) diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp index 99551e6cf1e8..eedfb8149e26 100644 --- a/icu4c/source/common/loclikely.cpp +++ b/icu4c/source/common/loclikely.cpp @@ -625,31 +625,6 @@ _uloc_minimizeSubtags(const char* localeID, } } -static int32_t -do_canonicalize(const char* localeID, - char* buffer, - int32_t bufferCapacity, - UErrorCode* err) -{ - int32_t canonicalizedSize = uloc_canonicalize( - localeID, - buffer, - bufferCapacity, - err); - - if (*err == U_STRING_NOT_TERMINATED_WARNING || - *err == U_BUFFER_OVERFLOW_ERROR) { - return canonicalizedSize; - } - else if (U_FAILURE(*err)) { - - return -1; - } - else { - return canonicalizedSize; - } -} - U_CAPI int32_t U_EXPORT2 uloc_addLikelySubtags(const char* localeID, char* maximizedLocaleID, @@ -683,14 +658,13 @@ static UBool _ulocimp_addLikelySubtags(const char* localeID, icu::ByteSink& sink, UErrorCode* status) { - PreflightingLocaleIDBuffer localeBuffer; - do { - localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), - localeBuffer.getCapacity(), status); - } while (localeBuffer.needToTryAgain(status)); - + icu::CharString localeBuffer; + { + icu::CharStringByteSink localeSink(&localeBuffer); + ulocimp_canonicalize(localeID, localeSink, status); + } if (U_SUCCESS(*status)) { - return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status); + return _uloc_addLikelySubtags(localeBuffer.data(), sink, status); } else { return false; } @@ -737,13 +711,12 @@ ulocimp_minimizeSubtags(const char* localeID, icu::ByteSink& sink, bool favorScript, UErrorCode* status) { - PreflightingLocaleIDBuffer localeBuffer; - do { - localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(), - localeBuffer.getCapacity(), status); - } while (localeBuffer.needToTryAgain(status)); - - _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, favorScript, status); + icu::CharString localeBuffer; + { + icu::CharStringByteSink localeSink(&localeBuffer); + ulocimp_canonicalize(localeID, localeSink, status); + } + _uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status); } // Pairs of (language subtag, + or -) for finding out fast if common languages diff --git a/icu4c/source/common/locresdata.cpp b/icu4c/source/common/locresdata.cpp index 7a0969dff583..c9d1cdddde44 100644 --- a/icu4c/source/common/locresdata.cpp +++ b/icu4c/source/common/locresdata.cpp @@ -24,6 +24,8 @@ #include "unicode/putil.h" #include "unicode/uloc.h" #include "unicode/ures.h" +#include "bytesinkutil.h" +#include "charstr.h" #include "cstring.h" #include "ulocimp.h" #include "uresimp.h" @@ -156,16 +158,18 @@ _uloc_getOrientationHelper(const char* localeId, ULayoutType result = ULOC_LAYOUT_UNKNOWN; if (!U_FAILURE(*status)) { - int32_t length = 0; - char localeBuffer[ULOC_FULLNAME_CAPACITY]; - - uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status); + icu::CharString localeBuffer; + { + icu::CharStringByteSink sink(&localeBuffer); + ulocimp_canonicalize(localeId, sink, status); + } if (!U_FAILURE(*status)) { + int32_t length = 0; const char16_t* const value = uloc_getTableStringWithFallback( nullptr, - localeBuffer, + localeBuffer.data(), "layout", nullptr, key, diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp index 4d5c1f8db056..fe3261c75d93 100644 --- a/icu4c/source/common/uloc_tag.cpp +++ b/icu4c/source/common/uloc_tag.cpp @@ -2625,53 +2625,18 @@ ulocimp_toLanguageTag(const char* localeID, UBool strict, UErrorCode* status) { icu::CharString canonical; - int32_t reslen; UErrorCode tmpStatus = U_ZERO_ERROR; UBool hadPosix = false; const char* pKeywordStart; /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ - int32_t resultCapacity = static_cast(uprv_strlen(localeID)); - if (resultCapacity > 0) { - char* buffer; - - for (;;) { - buffer = canonical.getAppendBuffer( - /*minCapacity=*/resultCapacity, - /*desiredCapacityHint=*/resultCapacity, - resultCapacity, - tmpStatus); - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return; - } - - reslen = - uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus); - - if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - resultCapacity = reslen; - tmpStatus = U_ZERO_ERROR; - } - - if (U_FAILURE(tmpStatus)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - canonical.append(buffer, reslen, tmpStatus); - if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. - } - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return; - } + { + icu::CharStringByteSink canonicalSink(&canonical); + ulocimp_canonicalize(localeID, canonicalSink, &tmpStatus); + } + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return; } /* For handling special case - private use only tag */ diff --git a/icu4c/source/i18n/calendar.cpp b/icu4c/source/i18n/calendar.cpp index 5612fe9acc3b..742891eb9284 100644 --- a/icu4c/source/i18n/calendar.cpp +++ b/icu4c/source/i18n/calendar.cpp @@ -255,23 +255,23 @@ static ECalType getCalendarTypeForLocale(const char *locid) { UErrorCode status = U_ZERO_ERROR; ECalType calType = CALTYPE_UNKNOWN; - //TODO: ULOC_FULL_NAME is out of date and too small.. - char canonicalName[256]; - // Canonicalize, so that an old-style variant will be transformed to keywords. // e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese // NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and // the Gregorian calendar is returned instead. - int32_t canonicalLen = uloc_canonicalize(locid, canonicalName, sizeof(canonicalName) - 1, &status); + CharString canonicalName; + { + CharStringByteSink sink(&canonicalName); + ulocimp_canonicalize(locid, sink, &status); + } if (U_FAILURE(status)) { return CALTYPE_GREGORIAN; } - canonicalName[canonicalLen] = 0; // terminate CharString calTypeBuf; { CharStringByteSink sink(&calTypeBuf); - ulocimp_getKeywordValue(canonicalName, "calendar", sink, &status); + ulocimp_getKeywordValue(canonicalName.data(), "calendar", sink, &status); } if (U_SUCCESS(status)) { calType = getCalendarType(calTypeBuf.data()); @@ -284,7 +284,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) { // when calendar keyword is not available or not supported, read supplementalData // to get the default calendar type for the locale's region char region[ULOC_COUNTRY_CAPACITY]; - (void)ulocimp_getRegionForSupplementalData(canonicalName, true, region, sizeof(region), &status); + (void)ulocimp_getRegionForSupplementalData(canonicalName.data(), true, region, sizeof(region), &status); if (U_FAILURE(status)) { return CALTYPE_GREGORIAN; } diff --git a/icu4c/source/i18n/ucol_sit.cpp b/icu4c/source/i18n/ucol_sit.cpp index 2cbe54332d2e..2e5bce2cbaaf 100644 --- a/icu4c/source/i18n/ucol_sit.cpp +++ b/icu4c/source/i18n/ucol_sit.cpp @@ -88,7 +88,6 @@ static const char providerKeyword[] = "@sp="; static const int32_t locElementCount = UCOL_SIT_LOCELEMENT_MAX+1; static const int32_t locElementCapacity = 32; static const int32_t loc3066Capacity = 256; -static const int32_t internalBufferSize = 512; /* structure containing specification of a collator. Initialized * from a short string. Also used to construct a short string from a @@ -452,11 +451,13 @@ ucol_prepareShortStringOpen( const char *definition, ucol_sit_readSpecs(&s, definition, parseError, status); ucol_sit_calculateWholeLocale(&s, *status); - char buffer[internalBufferSize]; - uprv_memset(buffer, 0, internalBufferSize); - uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status); + CharString buffer; + { + CharStringByteSink sink(&buffer); + ulocimp_canonicalize(s.locale.data(), sink, status); + } - UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer, status); + UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer.data(), status); /* we try to find stuff from keyword */ UResourceBundle *collations = ures_getByKey(b, "collations", nullptr, status); UResourceBundle *collElem = nullptr; @@ -464,7 +465,7 @@ ucol_prepareShortStringOpen( const char *definition, { // if there is a keyword, we pick it up and try to get elements CharStringByteSink sink(&keyBuffer); - ulocimp_getKeywordValue(buffer, "collation", sink, status); + ulocimp_getKeywordValue(buffer.data(), "collation", sink, status); } if(keyBuffer.isEmpty()) { // no keyword @@ -519,14 +520,16 @@ ucol_openFromShortString( const char *definition, string = ucol_sit_readSpecs(&s, definition, parseError, status); ucol_sit_calculateWholeLocale(&s, *status); - char buffer[internalBufferSize]; - uprv_memset(buffer, 0, internalBufferSize); #ifdef UCOL_TRACE_SIT fprintf(stderr, "DEF %s, DATA %s, ERR %s\n", definition, s.locale.data(), u_errorName(*status)); #endif - uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status); + CharString buffer; + { + CharStringByteSink sink(&buffer); + ulocimp_canonicalize(s.locale.data(), sink, status); + } - UCollator *result = ucol_open(buffer, status); + UCollator *result = ucol_open(buffer.data(), status); int32_t i = 0; for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {