diff --git a/icu4c/source/common/locid.cpp b/icu4c/source/common/locid.cpp index feadbcbccccb..7b259da88ce9 100644 --- a/icu4c/source/common/locid.cpp +++ b/icu4c/source/common/locid.cpp @@ -627,6 +627,17 @@ class AliasDataBuilder { LocalMemory& types, LocalMemory& replacementIndexes, int32_t &length, UErrorCode &status); + + // Read the subdivisionAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement variant. + void readSubdivisionAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, UErrorCode &status); }; /** @@ -647,6 +658,7 @@ class AliasData : public UMemory { const CharStringMap& scriptMap() const { return script; } const CharStringMap& territoryMap() const { return territory; } const CharStringMap& variantMap() const { return variant; } + const CharStringMap& subdivisionMap() const { return subdivision; } static void U_CALLCONV loadData(UErrorCode &status); static UBool U_CALLCONV cleanup(); @@ -658,11 +670,13 @@ class AliasData : public UMemory { CharStringMap scriptMap, CharStringMap territoryMap, CharStringMap variantMap, + CharStringMap subdivisionMap, CharString* strings) : language(std::move(languageMap)), script(std::move(scriptMap)), territory(std::move(territoryMap)), variant(std::move(variantMap)), + subdivision(std::move(subdivisionMap)), strings(strings) { } @@ -676,6 +690,7 @@ class AliasData : public UMemory { CharStringMap script; CharStringMap territory; CharStringMap variant; + CharStringMap subdivision; CharString* strings; friend class AliasDataBuilder; @@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias( status); } +/** + * Read the subdivisionAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement regions. + */ +void +AliasDataBuilder::readSubdivisionAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory& types, + LocalMemory& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8); + }, +#else + [](const char*) {}, +#endif + [](const UnicodeString&) { }, + status); +} + /** * Initializes the alias data from the ICU resource bundles. The alias data * contains alias of language, country, script and variants. @@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) { ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status)); LocalUResourceBundlePointer variantAlias( ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status)); + LocalUResourceBundlePointer subdivisionAlias( + ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status)); if (U_FAILURE(status)) { return nullptr; } int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0, - variantLength = 0; + variantLength = 0, subdivisionLength = 0; // Read the languageAlias into languageTypes, languageReplacementIndexes // and strings @@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) { variantReplacementIndexes, variantLength, status); + // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes + // and strings + LocalMemory subdivisionTypes; + LocalMemory subdivisionReplacementIndexes; + readSubdivisionAlias(subdivisionAlias.getAlias(), + &strings, + subdivisionTypes, + subdivisionReplacementIndexes, + subdivisionLength, status); + if (U_FAILURE(status)) { return nullptr; } @@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) { status); } + // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes. + CharStringMap subdivisionMap(2, status); + for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) { + subdivisionMap.put(subdivisionTypes[i], + strings.get(subdivisionReplacementIndexes[i]), + status); + } + if (U_FAILURE(status)) { return nullptr; } @@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) { std::move(scriptMap), std::move(territoryMap), std::move(variantMap), + std::move(subdivisionMap), strings.orphanCharStrings()); if (data == nullptr) { @@ -1105,6 +1169,9 @@ class AliasReplacer { // Replace by using variantAlias. bool replaceVariant(UErrorCode& status); + + // Replace by using subdivisionAlias. + bool replaceSubdivision(CharString& subdivision, UErrorCode& status); }; CharString& @@ -1433,6 +1500,27 @@ AliasReplacer::replaceVariant(UErrorCode& status) return false; } +bool +AliasReplacer::replaceSubdivision(CharString& subdivision, UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + const char *replacement = data->subdivisionMap().get(subdivision.data()); + if (replacement != nullptr) { + const char* firstSpace = uprv_strchr(replacement, ' '); + // Found replacement data for this subdivision. + size_t len = (firstSpace != nullptr) ? + (firstSpace - replacement) : uprv_strlen(replacement); + // Ignore len == 2, see CLDR-14312 + if (3 <= len && len <= 8) { + subdivision.clear().append(replacement, (int32_t)len, status); + } + return true; + } + return false; +} + CharString& AliasReplacer::outputToString( CharString& out, UErrorCode status) @@ -1495,7 +1583,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) region = nullptr; } const char* variantsStr = locale.getVariant(); - const char* extensionsStr = locale_getKeywordsStart(locale.getName()); CharString variantsBuff(variantsStr, -1, status); if (!variantsBuff.isEmpty()) { if (U_FAILURE(status)) { return false; } @@ -1559,11 +1646,42 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) if (U_FAILURE(status)) { return false; } // Nothing changed and we know the order of the vaiants are not change // because we have no variant or only one. - if (changed == 0 && variants.size() <= 1) { + const char* extensionsStr = locale_getKeywordsStart(locale.getName()); + if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) { return false; } outputToString(out, status); + if (U_FAILURE(status)) { + return false; + } if (extensionsStr != nullptr) { + changed = 0; + Locale temp(locale); + LocalPointer iter(locale.createKeywords(status)); + if (U_SUCCESS(status) && !iter.isNull()) { + const char* key; + while ((key = iter->next(nullptr, status)) != nullptr) { + if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0) { + CharString value; + CharStringByteSink valueSink(&value); + locale.getKeywordValue(key, valueSink, status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + if (replaceSubdivision(value, status)) { + changed++; + } + temp.setKeywordValue(key, value.data(), status); + if (U_FAILURE(status)) { + return false; + } + } + } + } + if (changed != 0) { + extensionsStr = locale_getKeywordsStart(temp.getName()); + } out.append(extensionsStr, status); } if (U_FAILURE(status)) { @@ -1572,7 +1690,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) // If the tag is not changed, return. if (uprv_strcmp(out.data(), locale.getName()) == 0) { U_ASSERT(changed == 0); - U_ASSERT(variants.size() > 1); out.clear(); return false; } diff --git a/icu4c/source/test/intltest/loctest.cpp b/icu4c/source/test/intltest/loctest.cpp index d12336b74d76..4bb402c84e51 100644 --- a/icu4c/source/test/intltest/loctest.cpp +++ b/icu4c/source/test/intltest/loctest.cpp @@ -4916,8 +4916,24 @@ void LocaleTest::TestCanonicalize(void) // ICU-21344 { "ku-Arab-NT", "ku-Arab-IQ"}, + // ICU-21402 + { "und-u-rg-no23", "und-u-rg-no50"}, + { "und-u-rg-cn11", "und-u-rg-cnbj"}, + { "und-u-rg-cz10a", "und-u-rg-cz110"}, + { "und-u-rg-fra", "und-u-rg-frges"}, + { "und-u-rg-frg", "und-u-rg-frges"}, + { "und-u-rg-lud", "und-u-rg-lucl"}, + + { "und-NO-u-sd-no23", "und-NO-u-sd-no50"}, + { "und-CN-u-sd-cn11", "und-CN-u-sd-cnbj"}, + { "und-CZ-u-sd-cz10a", "und-CZ-u-sd-cz110"}, + { "und-FR-u-sd-fra", "und-FR-u-sd-frges"}, + { "und-FR-u-sd-frg", "und-FR-u-sd-frges"}, + { "und-LU-u-sd-lud", "und-LU-u-sd-lucl"}, + // ICU-21401 { "cel-gaulish", "xtg"}, + }; int32_t i; for (i=0; i < UPRV_LENGTHOF(testCases); i++) { diff --git a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java index 9faa3c4444a3..3a19c2942b16 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java @@ -1268,12 +1268,33 @@ public String replace() { // Nothing changed in this iteration, break out the loop break; } // while(1) - if (changed) { - String result = lscvToID(language, script, region, + if (extensions == null && !changed) { + return null; + } + String result = lscvToID(language, script, region, ((variants == null) ? "" : Utility.joinStrings("_", variants))); - if (extensions != null) { - result += extensions; + if (extensions != null) { + boolean keywordChanged = false; + ULocale temp = new ULocale(result + extensions); + Iterator keywords = temp.getKeywords(); + while (keywords != null && keywords.hasNext()) { + String key = keywords.next(); + if (key.equals("rg") || key.equals("sd")) { + String value = temp.getKeywordValue(key); + String replacement = replaceSubdivision(value); + if (replacement != null) { + temp = temp.setKeywordValue(key, replacement); + keywordChanged = true; + } + } } + if (keywordChanged) { + extensions = temp.getName().substring(temp.getBaseName().length()); + changed = true; + } + result += extensions; + } + if (changed) { return result; } // Nothing changed in any iteration of the loop. @@ -1285,6 +1306,7 @@ public String replace() { private static Map scriptAliasMap = null; private static Map> territoryAliasMap = null; private static Map variantAliasMap = null; + private static Map subdivisionAliasMap = null; /* * Initializes the alias data from the ICU resource bundles. The alias @@ -1302,6 +1324,7 @@ private static synchronized void loadAliasData() { scriptAliasMap = new HashMap<>(); territoryAliasMap = new HashMap<>(); variantAliasMap = new HashMap<>(); + subdivisionAliasMap = new HashMap<>(); UResourceBundle metadata = UResourceBundle.getBundleInstance( ICUData.ICU_BASE_NAME, "metadata", @@ -1311,6 +1334,7 @@ private static synchronized void loadAliasData() { UResourceBundle scriptAlias = metadataAlias.get("script"); UResourceBundle territoryAlias = metadataAlias.get("territory"); UResourceBundle variantAlias = metadataAlias.get("variant"); + UResourceBundle subdivisionAlias = metadataAlias.get("subdivision"); for (int i = 0 ; i < languageAlias.getSize(); i++) { UResourceBundle res = languageAlias.get(i); @@ -1369,6 +1393,22 @@ private static synchronized void loadAliasData() { } variantAliasMap.put(aliasFrom, aliasTo); } + for (int i = 0 ; i < subdivisionAlias.getSize(); i++) { + UResourceBundle res = subdivisionAlias.get(i); + String aliasFrom = res.getKey(); + String aliasTo = res.get("replacement").getString().split(" ")[0]; + if (aliasFrom.length() < 3 || aliasFrom.length() > 8) { + throw new IllegalArgumentException( + "Incorrect key [" + aliasFrom + "] in alias:territory."); + } + if (aliasTo.length() < 3 || aliasTo.length() > 8) { + // Ignore replacement < 3 for now. see CLDR-14312 + // throw new IllegalArgumentException( + // "Incorrect value [" + aliasTo + "] in alias:subdivision."); + continue; + } + subdivisionAliasMap.put(aliasFrom, aliasTo); + } aliasDataIsLoaded = true; } @@ -1591,6 +1631,11 @@ private boolean replaceVariant() { } return false; } + + private String replaceSubdivision(String subdivision) { + return subdivisionAliasMap.get(subdivision); + } + }; /** diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java index 2868ddd8ccef..06eef5f246b8 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/ULocaleTest.java @@ -5216,6 +5216,21 @@ public void TestCanonical() { // ICU-21344 Assert.assertEquals("ku-Arab-IQ", canonicalTag("ku-Arab-NT")); + // ICU-21402 + Assert.assertEquals("und-u-rg-no50", canonicalTag("und-u-rg-no23")); + Assert.assertEquals("und-u-rg-cnbj", canonicalTag("und-u-rg-cn11")); + Assert.assertEquals("und-u-rg-cz110", canonicalTag("und-u-rg-cz10a")); + Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-fra")); + Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-frg")); + Assert.assertEquals("und-u-rg-lucl", canonicalTag("und-u-rg-lud")); + + Assert.assertEquals("und-NO-u-sd-no50", canonicalTag("und-NO-u-sd-no23")); + Assert.assertEquals("und-CN-u-sd-cnbj", canonicalTag("und-CN-u-sd-cn11")); + Assert.assertEquals("und-CZ-u-sd-cz110", canonicalTag("und-CZ-u-sd-cz10a")); + Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-fra")); + Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-frg")); + Assert.assertEquals("und-LU-u-sd-lucl", canonicalTag("und-LU-u-sd-lud")); + // ICU-21401 Assert.assertEquals("xtg", canonicalTag("cel-gaulish")); }