Skip to content

Commit

Permalink
ICU-21289 Switch to using CharString for calling uloc_canonicalize().
Browse files Browse the repository at this point in the history
  • Loading branch information
roubert committed Sep 25, 2023
1 parent 47b28d6 commit 2df1ab7
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 103 deletions.
51 changes: 12 additions & 39 deletions icu4c/source/common/loclikely.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -625,31 +625,6 @@ _uloc_minimizeSubtags(const char* localeID,
}
}

static int32_t
do_canonicalize(const char* localeID,
char* buffer,
int32_t bufferCapacity,
UErrorCode* err)
{
int32_t canonicalizedSize = uloc_canonicalize(
localeID,
buffer,
bufferCapacity,
err);

if (*err == U_STRING_NOT_TERMINATED_WARNING ||
*err == U_BUFFER_OVERFLOW_ERROR) {
return canonicalizedSize;
}
else if (U_FAILURE(*err)) {

return -1;
}
else {
return canonicalizedSize;
}
}

U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char* localeID,
char* maximizedLocaleID,
Expand Down Expand Up @@ -683,14 +658,13 @@ static UBool
_ulocimp_addLikelySubtags(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status) {
PreflightingLocaleIDBuffer localeBuffer;
do {
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
localeBuffer.getCapacity(), status);
} while (localeBuffer.needToTryAgain(status));

icu::CharString localeBuffer;
{
icu::CharStringByteSink localeSink(&localeBuffer);
ulocimp_canonicalize(localeID, localeSink, status);
}
if (U_SUCCESS(*status)) {
return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
return _uloc_addLikelySubtags(localeBuffer.data(), sink, status);
} else {
return false;
}
Expand Down Expand Up @@ -737,13 +711,12 @@ ulocimp_minimizeSubtags(const char* localeID,
icu::ByteSink& sink,
bool favorScript,
UErrorCode* status) {
PreflightingLocaleIDBuffer localeBuffer;
do {
localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
localeBuffer.getCapacity(), status);
} while (localeBuffer.needToTryAgain(status));

_uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, favorScript, status);
icu::CharString localeBuffer;
{
icu::CharStringByteSink localeSink(&localeBuffer);
ulocimp_canonicalize(localeID, localeSink, status);
}
_uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status);
}

// Pairs of (language subtag, + or -) for finding out fast if common languages
Expand Down
14 changes: 9 additions & 5 deletions icu4c/source/common/locresdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "unicode/ures.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cstring.h"
#include "ulocimp.h"
#include "uresimp.h"
Expand Down Expand Up @@ -156,16 +158,18 @@ _uloc_getOrientationHelper(const char* localeId,
ULayoutType result = ULOC_LAYOUT_UNKNOWN;

if (!U_FAILURE(*status)) {
int32_t length = 0;
char localeBuffer[ULOC_FULLNAME_CAPACITY];

uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status);
icu::CharString localeBuffer;
{
icu::CharStringByteSink sink(&localeBuffer);
ulocimp_canonicalize(localeId, sink, status);
}

if (!U_FAILURE(*status)) {
int32_t length = 0;
const char16_t* const value =
uloc_getTableStringWithFallback(
nullptr,
localeBuffer,
localeBuffer.data(),
"layout",
nullptr,
key,
Expand Down
49 changes: 7 additions & 42 deletions icu4c/source/common/uloc_tag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2625,53 +2625,18 @@ ulocimp_toLanguageTag(const char* localeID,
UBool strict,
UErrorCode* status) {
icu::CharString canonical;
int32_t reslen;
UErrorCode tmpStatus = U_ZERO_ERROR;
UBool hadPosix = false;
const char* pKeywordStart;

/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
if (resultCapacity > 0) {
char* buffer;

for (;;) {
buffer = canonical.getAppendBuffer(
/*minCapacity=*/resultCapacity,
/*desiredCapacityHint=*/resultCapacity,
resultCapacity,
tmpStatus);

if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return;
}

reslen =
uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);

if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
break;
}

resultCapacity = reslen;
tmpStatus = U_ZERO_ERROR;
}

if (U_FAILURE(tmpStatus)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}

canonical.append(buffer, reslen, tmpStatus);
if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
}

if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return;
}
{
icu::CharStringByteSink canonicalSink(&canonical);
ulocimp_canonicalize(localeID, canonicalSink, &tmpStatus);
}
if (U_FAILURE(tmpStatus)) {
*status = tmpStatus;
return;
}

/* For handling special case - private use only tag */
Expand Down
14 changes: 7 additions & 7 deletions icu4c/source/i18n/calendar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,23 +255,23 @@ static ECalType getCalendarTypeForLocale(const char *locid) {
UErrorCode status = U_ZERO_ERROR;
ECalType calType = CALTYPE_UNKNOWN;

//TODO: ULOC_FULL_NAME is out of date and too small..
char canonicalName[256];

// Canonicalize, so that an old-style variant will be transformed to keywords.
// e.g ja_JP_TRADITIONAL -> ja_JP@calendar=japanese
// NOTE: Since ICU-20187, ja_JP_TRADITIONAL no longer canonicalizes, and
// the Gregorian calendar is returned instead.
int32_t canonicalLen = uloc_canonicalize(locid, canonicalName, sizeof(canonicalName) - 1, &status);
CharString canonicalName;
{
CharStringByteSink sink(&canonicalName);
ulocimp_canonicalize(locid, sink, &status);
}
if (U_FAILURE(status)) {
return CALTYPE_GREGORIAN;
}
canonicalName[canonicalLen] = 0; // terminate

CharString calTypeBuf;
{
CharStringByteSink sink(&calTypeBuf);
ulocimp_getKeywordValue(canonicalName, "calendar", sink, &status);
ulocimp_getKeywordValue(canonicalName.data(), "calendar", sink, &status);
}
if (U_SUCCESS(status)) {
calType = getCalendarType(calTypeBuf.data());
Expand All @@ -284,7 +284,7 @@ static ECalType getCalendarTypeForLocale(const char *locid) {
// when calendar keyword is not available or not supported, read supplementalData
// to get the default calendar type for the locale's region
char region[ULOC_COUNTRY_CAPACITY];
(void)ulocimp_getRegionForSupplementalData(canonicalName, true, region, sizeof(region), &status);
(void)ulocimp_getRegionForSupplementalData(canonicalName.data(), true, region, sizeof(region), &status);
if (U_FAILURE(status)) {
return CALTYPE_GREGORIAN;
}
Expand Down
23 changes: 13 additions & 10 deletions icu4c/source/i18n/ucol_sit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ static const char providerKeyword[] = "@sp=";
static const int32_t locElementCount = UCOL_SIT_LOCELEMENT_MAX+1;
static const int32_t locElementCapacity = 32;
static const int32_t loc3066Capacity = 256;
static const int32_t internalBufferSize = 512;

/* structure containing specification of a collator. Initialized
* from a short string. Also used to construct a short string from a
Expand Down Expand Up @@ -452,19 +451,21 @@ ucol_prepareShortStringOpen( const char *definition,
ucol_sit_readSpecs(&s, definition, parseError, status);
ucol_sit_calculateWholeLocale(&s, *status);

char buffer[internalBufferSize];
uprv_memset(buffer, 0, internalBufferSize);
uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status);
CharString buffer;
{
CharStringByteSink sink(&buffer);
ulocimp_canonicalize(s.locale.data(), sink, status);
}

UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer, status);
UResourceBundle *b = ures_open(U_ICUDATA_COLL, buffer.data(), status);
/* we try to find stuff from keyword */
UResourceBundle *collations = ures_getByKey(b, "collations", nullptr, status);
UResourceBundle *collElem = nullptr;
CharString keyBuffer;
{
// if there is a keyword, we pick it up and try to get elements
CharStringByteSink sink(&keyBuffer);
ulocimp_getKeywordValue(buffer, "collation", sink, status);
ulocimp_getKeywordValue(buffer.data(), "collation", sink, status);
}
if(keyBuffer.isEmpty()) {
// no keyword
Expand Down Expand Up @@ -519,14 +520,16 @@ ucol_openFromShortString( const char *definition,
string = ucol_sit_readSpecs(&s, definition, parseError, status);
ucol_sit_calculateWholeLocale(&s, *status);

char buffer[internalBufferSize];
uprv_memset(buffer, 0, internalBufferSize);
#ifdef UCOL_TRACE_SIT
fprintf(stderr, "DEF %s, DATA %s, ERR %s\n", definition, s.locale.data(), u_errorName(*status));
#endif
uloc_canonicalize(s.locale.data(), buffer, internalBufferSize, status);
CharString buffer;
{
CharStringByteSink sink(&buffer);
ulocimp_canonicalize(s.locale.data(), sink, status);
}

UCollator *result = ucol_open(buffer, status);
UCollator *result = ucol_open(buffer.data(), status);
int32_t i = 0;

for(i = 0; i < UCOL_ATTRIBUTE_COUNT; i++) {
Expand Down

0 comments on commit 2df1ab7

Please sign in to comment.