Skip to content

Commit

Permalink
ICU-21289 Switch to using CharString for calling uloc_forLanguageTag().
Browse files Browse the repository at this point in the history
  • Loading branch information
roubert committed Sep 26, 2023
1 parent 99026f0 commit 96dcaf7
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 110 deletions.
51 changes: 16 additions & 35 deletions icu4c/source/common/uloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,25 +477,6 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = {
/* ### BCP47 Conversion *******************************************/
/* Test if the locale id has BCP47 u extension and does not have '@' */
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(localeID) == 1)
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
static const char* _ConvertBCP47(
const char* id, char* buffer, int32_t length,
UErrorCode* err, int32_t* pLocaleIdSize) {
const char* finalID;
int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, nullptr, err);
if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
finalID=id;
if (*err == U_STRING_NOT_TERMINATED_WARNING) {
*err = U_BUFFER_OVERFLOW_ERROR;
}
} else {
finalID=buffer;
}
if (pLocaleIdSize != nullptr) {
*pLocaleIdSize = localeIDSize;
}
return finalID;
}
/* Gets the size of the shortest subtag in the given localeID. */
static int32_t getShortestSubtagLength(const char *localeID) {
int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
Expand Down Expand Up @@ -762,7 +743,7 @@ ulocimp_getKeywordValue(const char* localeID,
char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];

if(status && U_SUCCESS(*status) && localeID) {
char tempBuffer[ULOC_FULLNAME_CAPACITY];
CharString tempBuffer;
const char* tmpLocaleID;

if (keywordName == nullptr || keywordName[0] == 0) {
Expand All @@ -776,8 +757,9 @@ ulocimp_getKeywordValue(const char* localeID,
}

if (_hasBCP47Extension(localeID)) {
tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
sizeof(tempBuffer), status, nullptr);
CharStringByteSink sink(&tempBuffer);
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
tmpLocaleID=localeID;
}
Expand Down Expand Up @@ -1406,16 +1388,17 @@ U_CAPI UEnumeration* U_EXPORT2
uloc_openKeywords(const char* localeID,
UErrorCode* status)
{
char tempBuffer[ULOC_FULLNAME_CAPACITY];
CharString tempBuffer;
const char* tmpLocaleID;

if(status==nullptr || U_FAILURE(*status)) {
return 0;
}

if (_hasBCP47Extension(localeID)) {
tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
sizeof(tempBuffer), status, nullptr);
CharStringByteSink sink(&tempBuffer);
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, status);
tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
if (localeID==nullptr) {
localeID=uloc_getDefault();
Expand Down Expand Up @@ -1489,7 +1472,7 @@ _canonicalize(const char* localeID,
}

int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
PreflightingLocaleIDBuffer tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
CharString tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this
CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
const char* origLocaleID;
const char* tmpLocaleID;
Expand All @@ -1512,13 +1495,9 @@ _canonicalize(const char* localeID,
}
}

do {
// After this call tmpLocaleID may point to localeIDPtr which may
// point to either localeID or localeIDWithHyphens.data().
tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(),
tempBuffer.getCapacity(), err,
&(tempBuffer.requestedCapacity));
} while (tempBuffer.needToTryAgain(err));
CharStringByteSink tempSink(&tempBuffer);
ulocimp_forLanguageTag(localeIDPtr, -1, tempSink, nullptr, err);
tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr;
} else {
if (localeID==nullptr) {
localeID=uloc_getDefault();
Expand Down Expand Up @@ -1795,7 +1774,7 @@ uloc_getVariant(const char* localeID,
int32_t variantCapacity,
UErrorCode* err)
{
char tempBuffer[ULOC_FULLNAME_CAPACITY];
CharString tempBuffer;
const char* tmpLocaleID;
int32_t i=0;

Expand All @@ -1804,7 +1783,9 @@ uloc_getVariant(const char* localeID,
}

if (_hasBCP47Extension(localeID)) {
tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr);
CharStringByteSink sink(&tempBuffer);
ulocimp_forLanguageTag(localeID, -1, sink, nullptr, err);
tmpLocaleID = U_SUCCESS(*err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
} else {
if (localeID==nullptr) {
localeID=uloc_getDefault();
Expand Down
68 changes: 0 additions & 68 deletions icu4c/source/common/ulocimp.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,72 +309,4 @@ U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* le
// Return true if the value is already canonicalized.
U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);

/**
* A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
* This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
* and then, if it's not big enough, reallocate it on the heap and try again.
*
* You use it like this:
* UErrorCode err = U_ZERO_ERROR;
*
* PreflightingLocaleIDBuffer tempBuffer;
* do {
* tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
* } while (tempBuffer.needToTryAgain(&err));
* if (U_SUCCESS(err)) {
* uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
* }
*/
class PreflightingLocaleIDBuffer {
private:
char stackBuffer[ULOC_FULLNAME_CAPACITY];
char* heapBuffer = nullptr;
int32_t capacity = ULOC_FULLNAME_CAPACITY;

public:
int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;

// No heap allocation. Use only on the stack.
static void* U_EXPORT2 operator new(size_t) noexcept = delete;
static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
#if U_HAVE_PLACEMENT_NEW
static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
#endif

PreflightingLocaleIDBuffer() {}

~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }

char* getBuffer() {
if (heapBuffer == nullptr) {
return stackBuffer;
} else {
return heapBuffer;
}
}

int32_t getCapacity() {
return capacity;
}

bool needToTryAgain(UErrorCode* err) {
if (heapBuffer != nullptr) {
return false;
}

if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia
heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
if (heapBuffer == nullptr) {
*err = U_MEMORY_ALLOCATION_ERROR;
} else {
*err = U_ZERO_ERROR;
capacity = newCapacity;
}
return U_SUCCESS(*err);
}
return false;
}
};

#endif
15 changes: 8 additions & 7 deletions icu4c/source/i18n/collationruleparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,19 +606,20 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) {
lang.appendInvariantChars(v, errorCode);
if(errorCode == U_MEMORY_ALLOCATION_ERROR) { return; }
// BCP 47 language tag -> ICU locale ID
char localeID[ULOC_FULLNAME_CAPACITY];
CharString localeID;
int32_t parsedLength;
int32_t length = uloc_forLanguageTag(lang.data(), localeID, ULOC_FULLNAME_CAPACITY,
&parsedLength, &errorCode);
if(U_FAILURE(errorCode) ||
parsedLength != lang.length() || length >= ULOC_FULLNAME_CAPACITY) {
{
CharStringByteSink sink(&localeID);
ulocimp_forLanguageTag(lang.data(), -1, sink, &parsedLength, &errorCode);
}
if(U_FAILURE(errorCode) || parsedLength != lang.length()) {
errorCode = U_ZERO_ERROR;
setParseError("expected language tag in [import langTag]", errorCode);
return;
}
// localeID minus all keywords
char baseID[ULOC_FULLNAME_CAPACITY];
length = uloc_getBaseName(localeID, baseID, ULOC_FULLNAME_CAPACITY, &errorCode);
int32_t length = uloc_getBaseName(localeID.data(), baseID, ULOC_FULLNAME_CAPACITY, &errorCode);
if(U_FAILURE(errorCode) || length >= ULOC_KEYWORDS_CAPACITY) {
errorCode = U_ZERO_ERROR;
setParseError("expected language tag in [import langTag]", errorCode);
Expand All @@ -634,7 +635,7 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) {
CharString collationType;
{
CharStringByteSink sink(&collationType);
ulocimp_getKeywordValue(localeID, "collation", sink, &errorCode);
ulocimp_getKeywordValue(localeID.data(), "collation", sink, &errorCode);
}
if(U_FAILURE(errorCode)) {
errorCode = U_ZERO_ERROR;
Expand Down

0 comments on commit 96dcaf7

Please sign in to comment.