Skip to content

Commit

Permalink
ICU-21402 replace sd and rg by subdivisionAlias
Browse files Browse the repository at this point in the history
See #1475
  • Loading branch information
FrankYFTang authored and Squash Bot committed Nov 19, 2020
1 parent 50bd796 commit 108f2c8
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 8 deletions.
123 changes: 119 additions & 4 deletions icu4c/source/common/locid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,17 @@ class AliasDataBuilder {
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);

// Read the subdivisionAlias data from alias to
// strings+types+replacementIndexes
// Allocate length items for types, to store the type field.
// Allocate length items for replacementIndexes,
// to store the index in the strings for the replacement variant.
void readSubdivisionAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
};

/**
Expand All @@ -647,6 +658,7 @@ class AliasData : public UMemory {
const CharStringMap& scriptMap() const { return script; }
const CharStringMap& territoryMap() const { return territory; }
const CharStringMap& variantMap() const { return variant; }
const CharStringMap& subdivisionMap() const { return subdivision; }

static void U_CALLCONV loadData(UErrorCode &status);
static UBool U_CALLCONV cleanup();
Expand All @@ -658,11 +670,13 @@ class AliasData : public UMemory {
CharStringMap scriptMap,
CharStringMap territoryMap,
CharStringMap variantMap,
CharStringMap subdivisionMap,
CharString* strings)
: language(std::move(languageMap)),
script(std::move(scriptMap)),
territory(std::move(territoryMap)),
variant(std::move(variantMap)),
subdivision(std::move(subdivisionMap)),
strings(strings) {
}

Expand All @@ -676,6 +690,7 @@ class AliasData : public UMemory {
CharStringMap script;
CharStringMap territory;
CharStringMap variant;
CharStringMap subdivision;
CharString* strings;

friend class AliasDataBuilder;
Expand Down Expand Up @@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias(
status);
}

/**
* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
* Allocate length items for types, to store the type field. Allocate length
* items for replacementIndexes, to store the index in the strings for the
* replacement regions.
*/
void
AliasDataBuilder::readSubdivisionAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{
return readAlias(
alias, strings, types, replacementIndexes, length,
#if U_DEBUG
[](const char* type) {
U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
},
#else
[](const char*) {},
#endif
[](const UnicodeString&) { },
status);
}

/**
* Initializes the alias data from the ICU resource bundles. The alias data
* contains alias of language, country, script and variants.
Expand Down Expand Up @@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) {
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
LocalUResourceBundlePointer variantAlias(
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
LocalUResourceBundlePointer subdivisionAlias(
ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));

if (U_FAILURE(status)) {
return nullptr;
}
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
variantLength = 0;
variantLength = 0, subdivisionLength = 0;

// Read the languageAlias into languageTypes, languageReplacementIndexes
// and strings
Expand Down Expand Up @@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) {
variantReplacementIndexes,
variantLength, status);

// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
// and strings
LocalMemory<const char*> subdivisionTypes;
LocalMemory<int32_t> subdivisionReplacementIndexes;
readSubdivisionAlias(subdivisionAlias.getAlias(),
&strings,
subdivisionTypes,
subdivisionReplacementIndexes,
subdivisionLength, status);

if (U_FAILURE(status)) {
return nullptr;
}
Expand Down Expand Up @@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) {
status);
}

// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
CharStringMap subdivisionMap(2, status);
for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
subdivisionMap.put(subdivisionTypes[i],
strings.get(subdivisionReplacementIndexes[i]),
status);
}

if (U_FAILURE(status)) {
return nullptr;
}
Expand All @@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) {
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
std::move(subdivisionMap),
strings.orphanCharStrings());

if (data == nullptr) {
Expand Down Expand Up @@ -1105,6 +1169,9 @@ class AliasReplacer {

// Replace by using variantAlias.
bool replaceVariant(UErrorCode& status);

// Replace by using subdivisionAlias.
bool replaceSubdivision(CharString& subdivision, UErrorCode& status);
};

CharString&
Expand Down Expand Up @@ -1433,6 +1500,27 @@ AliasReplacer::replaceVariant(UErrorCode& status)
return false;
}

bool
AliasReplacer::replaceSubdivision(CharString& subdivision, UErrorCode& status)
{
if (U_FAILURE(status)) {
return false;
}
const char *replacement = data->subdivisionMap().get(subdivision.data());
if (replacement != nullptr) {
const char* firstSpace = uprv_strchr(replacement, ' ');
// Found replacement data for this subdivision.
size_t len = (firstSpace != nullptr) ?
(firstSpace - replacement) : uprv_strlen(replacement);
// Ignore len == 2, see CLDR-14312
if (3 <= len && len <= 8) {
subdivision.clear().append(replacement, (int32_t)len, status);
}
return true;
}
return false;
}

CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode status)
Expand Down Expand Up @@ -1495,7 +1583,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
region = nullptr;
}
const char* variantsStr = locale.getVariant();
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
CharString variantsBuff(variantsStr, -1, status);
if (!variantsBuff.isEmpty()) {
if (U_FAILURE(status)) { return false; }
Expand Down Expand Up @@ -1559,11 +1646,40 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
if (U_FAILURE(status)) { return false; }
// Nothing changed and we know the order of the vaiants are not change
// because we have no variant or only one.
if (changed == 0 && variants.size() <= 1) {
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
return false;
}
outputToString(out, status);
if (U_FAILURE(status)) {
return false;
}
if (extensionsStr != nullptr) {
changed = 0;
Locale temp(locale);
LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
if (U_SUCCESS(status) && !iter.isNull()) {
const char* key;
while ((key = iter->next(nullptr, status)) != nullptr) {
status = U_ZERO_ERROR;
CharString value;
CharStringByteSink valueSink(&value);
locale.getKeywordValue(key, valueSink, status);
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
continue;
}
if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0) {
if (replaceSubdivision(value, status)) {
changed++;
}
temp.setKeywordValue(key, value.data(), status);
}
}
}
if (changed != 0) {
extensionsStr = locale_getKeywordsStart(temp.getName());
}
out.append(extensionsStr, status);
}
if (U_FAILURE(status)) {
Expand All @@ -1572,7 +1688,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
// If the tag is not changed, return.
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
U_ASSERT(changed == 0);
U_ASSERT(variants.size() > 1);
out.clear();
return false;
}
Expand Down
15 changes: 15 additions & 0 deletions icu4c/source/test/intltest/loctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4915,6 +4915,21 @@ void LocaleTest::TestCanonicalize(void)

// ICU-21344
{ "ku-Arab-NT", "ku-Arab-IQ"},

// ICU-21402
{ "und-u-rg-no23", "und-u-rg-no50"},
{ "und-u-rg-cn11", "und-u-rg-cnbj"},
{ "und-u-rg-cz10a", "und-u-rg-cz110"},
{ "und-u-rg-fra", "und-u-rg-frges"},
{ "und-u-rg-frg", "und-u-rg-frges"},
{ "und-u-rg-lud", "und-u-rg-lucl"},

{ "und-NO-u-sd-no23", "und-NO-u-sd-no50"},
{ "und-CN-u-sd-cn11", "und-CN-u-sd-cnbj"},
{ "und-CZ-u-sd-cz10a", "und-CZ-u-sd-cz110"},
{ "und-FR-u-sd-fra", "und-FR-u-sd-frges"},
{ "und-FR-u-sd-frg", "und-FR-u-sd-frges"},
{ "und-LU-u-sd-lud", "und-LU-u-sd-lucl"},
};
int32_t i;
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
Expand Down
51 changes: 47 additions & 4 deletions icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
Original file line number Diff line number Diff line change
Expand Up @@ -1268,12 +1268,31 @@ public String replace() {
// Nothing changed in this iteration, break out the loop
break;
} // while(1)
if (changed) {
String result = lscvToID(language, script, region,
if (extensions == null && !changed) {
return null;
}
String result = lscvToID(language, script, region,
((variants == null) ? "" : Utility.joinStrings("_", variants)));
if (extensions != null) {
result += extensions;
if (extensions != null) {
boolean keywordChanged = false;
ULocale temp = new ULocale(result + extensions);
Iterator<String> keywords = temp.getKeywords();
while (keywords != null && keywords.hasNext()) {
String key = keywords.next();
String value = temp.getKeywordValue(key);
String replacement = replaceSubdivision(value);
if (replacement != null) {
temp = temp.setKeywordValue(key, replacement);
keywordChanged = true;
}
}
if (keywordChanged) {
extensions = temp.getName().substring(temp.getBaseName().length());
changed = true;
}
result += extensions;
}
if (changed) {
return result;
}
// Nothing changed in any iteration of the loop.
Expand All @@ -1285,6 +1304,7 @@ public String replace() {
private static Map<String, String> scriptAliasMap = null;
private static Map<String, List<String>> territoryAliasMap = null;
private static Map<String, String> variantAliasMap = null;
private static Map<String, String> subdivisionAliasMap = null;

/*
* Initializes the alias data from the ICU resource bundles. The alias
Expand All @@ -1302,6 +1322,7 @@ private static synchronized void loadAliasData() {
scriptAliasMap = new HashMap<>();
territoryAliasMap = new HashMap<>();
variantAliasMap = new HashMap<>();
subdivisionAliasMap = new HashMap<>();

UResourceBundle metadata = UResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, "metadata",
Expand All @@ -1311,6 +1332,7 @@ private static synchronized void loadAliasData() {
UResourceBundle scriptAlias = metadataAlias.get("script");
UResourceBundle territoryAlias = metadataAlias.get("territory");
UResourceBundle variantAlias = metadataAlias.get("variant");
UResourceBundle subdivisionAlias = metadataAlias.get("subdivision");

for (int i = 0 ; i < languageAlias.getSize(); i++) {
UResourceBundle res = languageAlias.get(i);
Expand Down Expand Up @@ -1369,6 +1391,22 @@ private static synchronized void loadAliasData() {
}
variantAliasMap.put(aliasFrom, aliasTo);
}
for (int i = 0 ; i < subdivisionAlias.getSize(); i++) {
UResourceBundle res = subdivisionAlias.get(i);
String aliasFrom = res.getKey();
String aliasTo = res.get("replacement").getString().split(" ")[0];
if (aliasFrom.length() < 3 || aliasFrom.length() > 8) {
throw new IllegalArgumentException(
"Incorrect key [" + aliasFrom + "] in alias:territory.");
}
if (aliasTo.length() < 3 || aliasTo.length() > 8) {
// Ignore replacement < 3 for now. see CLDR-14312
// throw new IllegalArgumentException(
// "Incorrect value [" + aliasTo + "] in alias:subdivision.");
continue;
}
subdivisionAliasMap.put(aliasFrom, aliasTo);
}

aliasDataIsLoaded = true;
}
Expand Down Expand Up @@ -1591,6 +1629,11 @@ private boolean replaceVariant() {
}
return false;
}

private String replaceSubdivision(String subdivision) {
return subdivisionAliasMap.get(subdivision);
}

};

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5215,6 +5215,21 @@ public void TestCanonical() {

// ICU-21344
Assert.assertEquals("ku-Arab-IQ", canonicalTag("ku-Arab-NT"));

// ICU-21402
Assert.assertEquals("und-u-rg-no50", canonicalTag("und-u-rg-no23"));
Assert.assertEquals("und-u-rg-cnbj", canonicalTag("und-u-rg-cn11"));
Assert.assertEquals("und-u-rg-cz110", canonicalTag("und-u-rg-cz10a"));
Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-fra"));
Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-frg"));
Assert.assertEquals("und-u-rg-lucl", canonicalTag("und-u-rg-lud"));

Assert.assertEquals("und-NO-u-sd-no50", canonicalTag("und-NO-u-sd-no23"));
Assert.assertEquals("und-CN-u-sd-cnbj", canonicalTag("und-CN-u-sd-cn11"));
Assert.assertEquals("und-CZ-u-sd-cz110", canonicalTag("und-CZ-u-sd-cz10a"));
Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-fra"));
Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-frg"));
Assert.assertEquals("und-LU-u-sd-lucl", canonicalTag("und-LU-u-sd-lud"));
}

@Test
Expand Down

0 comments on commit 108f2c8

Please sign in to comment.