Skip to content

Commit

Permalink
ICU-21289 Switch to using CharString for calling uloc_getParent().
Browse files Browse the repository at this point in the history
  • Loading branch information
roubert committed Sep 26, 2023
1 parent 96dcaf7 commit 037449f
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 42 deletions.
40 changes: 26 additions & 14 deletions icu4c/source/common/ucurr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#if !UCONFIG_NO_FORMATTING

#include <utility>

#include "unicode/ucurr.h"
#include "unicode/locid.h"
#include "unicode/ures.h"
Expand Down Expand Up @@ -602,11 +604,15 @@ ucurr_forLocale(const char* locale,

if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
// We don't know about it. Check to see if we support the variant.
uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec);
CharString parent;
{
CharStringByteSink sink(&parent);
ulocimp_getParent(locale, sink, ec);
}
*ec = U_USING_FALLBACK_WARNING;
// TODO: Loop over the shortened id rather than recursing and
// TODO: Loop over the parent rather than recursing and
// looking again for a currency keyword.
return ucurr_forLocale(id, buff, buffCapacity, ec);
return ucurr_forLocale(parent.data(), buff, buffCapacity, ec);
}
if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) {
// There is nothing to fallback to. Report the failure/warning if possible.
Expand All @@ -629,20 +635,22 @@ ucurr_forLocale(const char* locale,
* @return true if the fallback happened; false if locale is already
* root ("").
*/
static UBool fallback(char *loc) {
if (!*loc) {
static UBool fallback(CharString& loc) {
if (loc.isEmpty()) {
return false;
}
UErrorCode status = U_ZERO_ERROR;
if (uprv_strcmp(loc, "en_GB") == 0) {
if (loc == "en_GB") {
// HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en"
// in order to consume the correct data strings. This hack will be removed
// when proper data sink loading is implemented here.
// NOTE: "001" adds 1 char over "GB". However, both call sites allocate
// arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001).
uprv_strcpy(loc + 3, "001");
loc.truncate(3);
loc.append("001", status);
} else {
uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status);
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(loc.data(), sink, &status);
loc = std::move(tmp);
}
/*
char *i = uprv_strrchr(loc, '_');
Expand Down Expand Up @@ -915,13 +923,17 @@ getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_
*total_currency_name_count = 0;
*total_currency_symbol_count = 0;
const char16_t* s = nullptr;
char locale[ULOC_FULLNAME_CAPACITY] = "";
uprv_strcpy(locale, loc);
CharString locale;
{
UErrorCode status = U_ZERO_ERROR;
locale.append(loc, status);
if (U_FAILURE(status)) { return; }
}
const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv();
for (;;) {
UErrorCode ec2 = U_ZERO_ERROR;
// TODO: ures_openDirect?
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale, &ec2);
UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale.data(), &ec2);
UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, nullptr, &ec2);
int32_t n = ures_getSize(curr);
for (int32_t i=0; i<n; ++i) {
Expand Down Expand Up @@ -1122,7 +1134,7 @@ collectCurrencyNames(const char* locale,
ures_close(curr);
ures_close(rb);

if (!fallback(loc.data())) {
if (!fallback(loc)) {
break;
}
}
Expand Down
35 changes: 29 additions & 6 deletions icu4c/source/common/uloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1655,12 +1655,39 @@ uloc_getParent(const char* localeID,
char* parent,
int32_t parentCapacity,
UErrorCode* err)
{
if (U_FAILURE(*err)) {
return 0;
}

CheckedArrayByteSink sink(parent, parentCapacity);
ulocimp_getParent(localeID, sink, err);

int32_t reslen = sink.NumberOfBytesAppended();

if (U_FAILURE(*err)) {
return reslen;
}

if (sink.Overflowed()) {
*err = U_BUFFER_OVERFLOW_ERROR;
} else {
u_terminateChars(parent, parentCapacity, reslen, err);
}

return reslen;
}

U_CAPI void U_EXPORT2
ulocimp_getParent(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err)
{
const char *lastUnderscore;
int32_t i;

if (U_FAILURE(*err))
return 0;
return;

if (localeID == nullptr)
localeID = uloc_getDefault();
Expand All @@ -1676,13 +1703,9 @@ uloc_getParent(const char* localeID,
if (uprv_strnicmp(localeID, "und_", 4) == 0) {
localeID += 3;
i -= 3;
uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
} else if (parent != localeID) {
uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
}
sink.Append(localeID, i);
}

return u_terminateChars(parent, parentCapacity, i, err);
}

U_CAPI int32_t U_EXPORT2
Expand Down
5 changes: 5 additions & 0 deletions icu4c/source/common/ulocimp.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ ulocimp_getKeywordValue(const char* localeID,
icu::ByteSink& sink,
UErrorCode* status);

U_CAPI void U_EXPORT2
ulocimp_getParent(const char* localeID,
icu::ByteSink& sink,
UErrorCode* err);

/**
* Writes a well-formed language tag for this locale ID.
*
Expand Down
12 changes: 7 additions & 5 deletions icu4c/source/i18n/dayperiodrules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
#include "dayperiodrules.h"

#include "unicode/ures.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cstring.h"
#include "ucln_in.h"
#include "uhash.h"
#include "ulocimp.h"
#include "umutex.h"
#include "uresimp.h"

Expand Down Expand Up @@ -342,7 +344,6 @@ const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCo

const char *localeCode = locale.getBaseName();
char name[ULOC_FULLNAME_CAPACITY];
char parentName[ULOC_FULLNAME_CAPACITY];

if (uprv_strlen(localeCode) < ULOC_FULLNAME_CAPACITY) {
uprv_strcpy(name, localeCode);
Expand All @@ -360,13 +361,14 @@ const DayPeriodRules *DayPeriodRules::getInstance(const Locale &locale, UErrorCo
while (*name != '\0') {
ruleSetNum = uhash_geti(data->localeToRuleSetNumMap, name);
if (ruleSetNum == 0) {
// name and parentName can't be the same pointer, so fill in parent then copy to child.
uloc_getParent(name, parentName, ULOC_FULLNAME_CAPACITY, &errorCode);
if (*parentName == '\0') {
CharString parent;
CharStringByteSink sink(&parent);
ulocimp_getParent(name, sink, &errorCode);
if (parent.isEmpty()) {
// Saves a lookup in the hash table.
break;
}
uprv_strcpy(name, parentName);
parent.extract(name, UPRV_LENGTHOF(name), errorCode);
} else {
break;
}
Expand Down
19 changes: 15 additions & 4 deletions icu4c/source/i18n/gender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,20 @@

#if !UCONFIG_NO_FORMATTING

#include <utility>

#include "unicode/gender.h"
#include "unicode/ugender.h"
#include "unicode/ures.h"

#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "mutex.h"
#include "uassert.h"
#include "ucln_in.h"
#include "ulocimp.h"
#include "umutex.h"
#include "uhash.h"

Expand Down Expand Up @@ -148,12 +153,18 @@ const GenderInfo* GenderInfo::loadInstance(const Locale& locale, UErrorCode& sta
const char16_t* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &key_status);
if (s == nullptr) {
key_status = U_ZERO_ERROR;
char parentLocaleName[ULOC_FULLNAME_CAPACITY];
uprv_strcpy(parentLocaleName, curLocaleName);
while (s == nullptr && uloc_getParent(parentLocaleName, parentLocaleName, ULOC_FULLNAME_CAPACITY, &key_status) > 0) {
CharString parentLocaleName(curLocaleName, key_status);
while (s == nullptr) {
{
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(parentLocaleName.data(), sink, &status);
if (tmp.isEmpty()) break;
parentLocaleName = std::move(tmp);
}
key_status = U_ZERO_ERROR;
resLen = 0;
s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &key_status);
s = ures_getStringByKey(locRes.getAlias(), parentLocaleName.data(), &resLen, &key_status);
key_status = U_ZERO_ERROR;
}
}
Expand Down
19 changes: 14 additions & 5 deletions icu4c/source/i18n/plurrule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include <math.h>
#include <stdio.h>

#include <utility>

#include "unicode/utypes.h"
#include "unicode/localpointer.h"
#include "unicode/plurrule.h"
Expand All @@ -20,6 +22,7 @@
#include "unicode/numfmt.h"
#include "unicode/decimfmt.h"
#include "unicode/numberrangeformatter.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
Expand All @@ -40,6 +43,7 @@
#include "util.h"
#include "pluralranges.h"
#include "numrange_impl.h"
#include "ulocimp.h"

#if !UCONFIG_NO_FORMATTING

Expand Down Expand Up @@ -827,14 +831,19 @@ PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorC
if (s == nullptr) {
// Check parent locales.
UErrorCode status = U_ZERO_ERROR;
char parentLocaleName[ULOC_FULLNAME_CAPACITY];
const char *curLocaleName2=locale.getBaseName();
uprv_strcpy(parentLocaleName, curLocaleName2);
CharString parentLocaleName(curLocaleName2, status);

while (uloc_getParent(parentLocaleName, parentLocaleName,
ULOC_FULLNAME_CAPACITY, &status) > 0) {
for (;;) {
{
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(parentLocaleName.data(), sink, &status);
if (tmp.isEmpty()) break;
parentLocaleName = std::move(tmp);
}
resLen=0;
s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
s = ures_getStringByKey(locRes.getAlias(), parentLocaleName.data(), &resLen, &status);
if (s != nullptr) {
errCode = U_ZERO_ERROR;
break;
Expand Down
23 changes: 15 additions & 8 deletions icu4c/source/i18n/tmutfmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,18 @@

#if !UCONFIG_NO_FORMATTING

#include <utility>

#include "unicode/decimfmt.h"
#include "unicode/localpointer.h"
#include "plurrule_impl.h"
#include "uvector.h"
#include "bytesinkutil.h"
#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "hash.h"
#include "ulocimp.h"
#include "uresimp.h"
#include "ureslocs.h"
#include "unicode/msgfmt.h"
Expand Down Expand Up @@ -556,14 +560,17 @@ TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key,
return;
}
UErrorCode status = U_ZERO_ERROR;
char parentLocale[ULOC_FULLNAME_CAPACITY];
uprv_strcpy(parentLocale, localeName);
int32_t locNameLen;
CharString parentLocale(localeName, status);
U_ASSERT(countToPatterns != nullptr);
while ((locNameLen = uloc_getParent(parentLocale, parentLocale,
ULOC_FULLNAME_CAPACITY, &status)) >= 0){
for (;;) {
{
CharString tmp;
CharStringByteSink sink(&tmp);
ulocimp_getParent(parentLocale.data(), sink, &status);
parentLocale = std::move(tmp);
}
// look for pattern for srcPluralCount in locale tree
LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_UNIT, parentLocale, &status));
LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_UNIT, parentLocale.data(), &status));
LocalUResourceBundlePointer unitsRes(ures_getByKey(rb.getAlias(), key, nullptr, &status));
const char* timeUnitName = getTimeUnitName(srcTimeUnitField, status);
LocalUResourceBundlePointer countsToPatternRB(ures_getByKey(unitsRes.getAlias(), timeUnitName, nullptr, &status));
Expand Down Expand Up @@ -594,14 +601,14 @@ TimeUnitFormat::searchInLocaleChain(UTimeUnitFormatStyle style, const char* key,
return;
}
status = U_ZERO_ERROR;
if (locNameLen == 0) {
if (parentLocale.isEmpty()) {
break;
}
}

// if no unitsShort resource was found even after fallback to root locale
// then search the units resource fallback from the current level to root
if ( locNameLen == 0 && uprv_strcmp(key, gShortUnitsTag) == 0) {
if ( parentLocale.isEmpty() && uprv_strcmp(key, gShortUnitsTag) == 0) {
#ifdef TMUTFMT_DEBUG
std::cout << "loop into searchInLocaleChain since Short-Long-Alternative \n";
#endif
Expand Down

0 comments on commit 037449f

Please sign in to comment.