Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICU-21402 replace sd and rg by subdivisionAlias #1475

Merged
merged 1 commit into from
Nov 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 121 additions & 4 deletions icu4c/source/common/locid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,17 @@ class AliasDataBuilder {
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);

// Read the subdivisionAlias data from alias to
// strings+types+replacementIndexes
// Allocate length items for types, to store the type field.
// Allocate length items for replacementIndexes,
// to store the index in the strings for the replacement variant.
void readSubdivisionAlias(UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length, UErrorCode &status);
};

/**
Expand All @@ -647,6 +658,7 @@ class AliasData : public UMemory {
const CharStringMap& scriptMap() const { return script; }
const CharStringMap& territoryMap() const { return territory; }
const CharStringMap& variantMap() const { return variant; }
const CharStringMap& subdivisionMap() const { return subdivision; }

static void U_CALLCONV loadData(UErrorCode &status);
static UBool U_CALLCONV cleanup();
Expand All @@ -658,11 +670,13 @@ class AliasData : public UMemory {
CharStringMap scriptMap,
CharStringMap territoryMap,
CharStringMap variantMap,
CharStringMap subdivisionMap,
CharString* strings)
: language(std::move(languageMap)),
script(std::move(scriptMap)),
territory(std::move(territoryMap)),
variant(std::move(variantMap)),
subdivision(std::move(subdivisionMap)),
strings(strings) {
}

Expand All @@ -676,6 +690,7 @@ class AliasData : public UMemory {
CharStringMap script;
CharStringMap territory;
CharStringMap variant;
CharStringMap subdivision;
CharString* strings;

friend class AliasDataBuilder;
Expand Down Expand Up @@ -866,6 +881,34 @@ AliasDataBuilder::readVariantAlias(
status);
}

/**
* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
* Allocate length items for types, to store the type field. Allocate length
* items for replacementIndexes, to store the index in the strings for the
* replacement regions.
*/
void
AliasDataBuilder::readSubdivisionAlias(
UResourceBundle* alias,
UniqueCharStrings* strings,
LocalMemory<const char*>& types,
LocalMemory<int32_t>& replacementIndexes,
int32_t &length,
UErrorCode &status)
{
return readAlias(
alias, strings, types, replacementIndexes, length,
#if U_DEBUG
[](const char* type) {
U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
},
#else
[](const char*) {},
#endif
[](const UnicodeString&) { },
status);
}

/**
* Initializes the alias data from the ICU resource bundles. The alias data
* contains alias of language, country, script and variants.
Expand Down Expand Up @@ -905,12 +948,14 @@ AliasDataBuilder::build(UErrorCode &status) {
ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
LocalUResourceBundlePointer variantAlias(
ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
LocalUResourceBundlePointer subdivisionAlias(
ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));

if (U_FAILURE(status)) {
return nullptr;
}
int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
variantLength = 0;
variantLength = 0, subdivisionLength = 0;

// Read the languageAlias into languageTypes, languageReplacementIndexes
// and strings
Expand Down Expand Up @@ -955,6 +1000,16 @@ AliasDataBuilder::build(UErrorCode &status) {
variantReplacementIndexes,
variantLength, status);

// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
// and strings
LocalMemory<const char*> subdivisionTypes;
LocalMemory<int32_t> subdivisionReplacementIndexes;
readSubdivisionAlias(subdivisionAlias.getAlias(),
&strings,
subdivisionTypes,
subdivisionReplacementIndexes,
subdivisionLength, status);

if (U_FAILURE(status)) {
return nullptr;
}
Expand Down Expand Up @@ -994,6 +1049,14 @@ AliasDataBuilder::build(UErrorCode &status) {
status);
}

// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
CharStringMap subdivisionMap(2, status);
for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
subdivisionMap.put(subdivisionTypes[i],
strings.get(subdivisionReplacementIndexes[i]),
status);
}

if (U_FAILURE(status)) {
return nullptr;
}
Expand All @@ -1004,6 +1067,7 @@ AliasDataBuilder::build(UErrorCode &status) {
std::move(scriptMap),
std::move(territoryMap),
std::move(variantMap),
std::move(subdivisionMap),
strings.orphanCharStrings());

if (data == nullptr) {
Expand Down Expand Up @@ -1105,6 +1169,9 @@ class AliasReplacer {

// Replace by using variantAlias.
bool replaceVariant(UErrorCode& status);

// Replace by using subdivisionAlias.
bool replaceSubdivision(CharString& subdivision, UErrorCode& status);
};

CharString&
Expand Down Expand Up @@ -1433,6 +1500,27 @@ AliasReplacer::replaceVariant(UErrorCode& status)
return false;
}

bool
AliasReplacer::replaceSubdivision(CharString& subdivision, UErrorCode& status)
{
if (U_FAILURE(status)) {
return false;
}
const char *replacement = data->subdivisionMap().get(subdivision.data());
if (replacement != nullptr) {
const char* firstSpace = uprv_strchr(replacement, ' ');
// Found replacement data for this subdivision.
size_t len = (firstSpace != nullptr) ?
(firstSpace - replacement) : uprv_strlen(replacement);
// Ignore len == 2, see CLDR-14312
if (3 <= len && len <= 8) {
subdivision.clear().append(replacement, (int32_t)len, status);
}
return true;
}
return false;
}

CharString&
AliasReplacer::outputToString(
CharString& out, UErrorCode status)
Expand Down Expand Up @@ -1495,7 +1583,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
region = nullptr;
}
const char* variantsStr = locale.getVariant();
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
CharString variantsBuff(variantsStr, -1, status);
if (!variantsBuff.isEmpty()) {
if (U_FAILURE(status)) { return false; }
Expand Down Expand Up @@ -1559,11 +1646,42 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
if (U_FAILURE(status)) { return false; }
// Nothing changed and we know the order of the vaiants are not change
// because we have no variant or only one.
if (changed == 0 && variants.size() <= 1) {
const char* extensionsStr = locale_getKeywordsStart(locale.getName());
if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
return false;
}
outputToString(out, status);
if (U_FAILURE(status)) {
return false;
}
if (extensionsStr != nullptr) {
changed = 0;
Locale temp(locale);
LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
if (U_SUCCESS(status) && !iter.isNull()) {
const char* key;
while ((key = iter->next(nullptr, status)) != nullptr) {
if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0) {
CharString value;
CharStringByteSink valueSink(&value);
locale.getKeywordValue(key, valueSink, status);
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
continue;
}
if (replaceSubdivision(value, status)) {
changed++;
}
temp.setKeywordValue(key, value.data(), status);
if (U_FAILURE(status)) {
return false;
}
}
}
}
if (changed != 0) {
extensionsStr = locale_getKeywordsStart(temp.getName());
}
out.append(extensionsStr, status);
}
if (U_FAILURE(status)) {
Expand All @@ -1572,7 +1690,6 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status)
// If the tag is not changed, return.
if (uprv_strcmp(out.data(), locale.getName()) == 0) {
U_ASSERT(changed == 0);
U_ASSERT(variants.size() > 1);
out.clear();
return false;
}
Expand Down
16 changes: 16 additions & 0 deletions icu4c/source/test/intltest/loctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4916,8 +4916,24 @@ void LocaleTest::TestCanonicalize(void)
// ICU-21344
{ "ku-Arab-NT", "ku-Arab-IQ"},

// ICU-21402
{ "und-u-rg-no23", "und-u-rg-no50"},
{ "und-u-rg-cn11", "und-u-rg-cnbj"},
{ "und-u-rg-cz10a", "und-u-rg-cz110"},
{ "und-u-rg-fra", "und-u-rg-frges"},
{ "und-u-rg-frg", "und-u-rg-frges"},
{ "und-u-rg-lud", "und-u-rg-lucl"},

{ "und-NO-u-sd-no23", "und-NO-u-sd-no50"},
{ "und-CN-u-sd-cn11", "und-CN-u-sd-cnbj"},
{ "und-CZ-u-sd-cz10a", "und-CZ-u-sd-cz110"},
{ "und-FR-u-sd-fra", "und-FR-u-sd-frges"},
{ "und-FR-u-sd-frg", "und-FR-u-sd-frges"},
{ "und-LU-u-sd-lud", "und-LU-u-sd-lucl"},

// ICU-21401
{ "cel-gaulish", "xtg"},

};
int32_t i;
for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
Expand Down
53 changes: 49 additions & 4 deletions icu4j/main/classes/core/src/com/ibm/icu/util/ULocale.java
Original file line number Diff line number Diff line change
Expand Up @@ -1268,12 +1268,33 @@ public String replace() {
// Nothing changed in this iteration, break out the loop
break;
} // while(1)
if (changed) {
String result = lscvToID(language, script, region,
if (extensions == null && !changed) {
return null;
}
String result = lscvToID(language, script, region,
((variants == null) ? "" : Utility.joinStrings("_", variants)));
if (extensions != null) {
result += extensions;
if (extensions != null) {
boolean keywordChanged = false;
ULocale temp = new ULocale(result + extensions);
Iterator<String> keywords = temp.getKeywords();
while (keywords != null && keywords.hasNext()) {
String key = keywords.next();
if (key.equals("rg") || key.equals("sd")) {
String value = temp.getKeywordValue(key);
String replacement = replaceSubdivision(value);
if (replacement != null) {
temp = temp.setKeywordValue(key, replacement);
keywordChanged = true;
}
}
}
if (keywordChanged) {
extensions = temp.getName().substring(temp.getBaseName().length());
changed = true;
}
result += extensions;
}
if (changed) {
return result;
}
// Nothing changed in any iteration of the loop.
Expand All @@ -1285,6 +1306,7 @@ public String replace() {
private static Map<String, String> scriptAliasMap = null;
private static Map<String, List<String>> territoryAliasMap = null;
private static Map<String, String> variantAliasMap = null;
private static Map<String, String> subdivisionAliasMap = null;

/*
* Initializes the alias data from the ICU resource bundles. The alias
Expand All @@ -1302,6 +1324,7 @@ private static synchronized void loadAliasData() {
scriptAliasMap = new HashMap<>();
territoryAliasMap = new HashMap<>();
variantAliasMap = new HashMap<>();
subdivisionAliasMap = new HashMap<>();

UResourceBundle metadata = UResourceBundle.getBundleInstance(
ICUData.ICU_BASE_NAME, "metadata",
Expand All @@ -1311,6 +1334,7 @@ private static synchronized void loadAliasData() {
UResourceBundle scriptAlias = metadataAlias.get("script");
UResourceBundle territoryAlias = metadataAlias.get("territory");
UResourceBundle variantAlias = metadataAlias.get("variant");
UResourceBundle subdivisionAlias = metadataAlias.get("subdivision");

for (int i = 0 ; i < languageAlias.getSize(); i++) {
UResourceBundle res = languageAlias.get(i);
Expand Down Expand Up @@ -1369,6 +1393,22 @@ private static synchronized void loadAliasData() {
}
variantAliasMap.put(aliasFrom, aliasTo);
}
for (int i = 0 ; i < subdivisionAlias.getSize(); i++) {
UResourceBundle res = subdivisionAlias.get(i);
String aliasFrom = res.getKey();
String aliasTo = res.get("replacement").getString().split(" ")[0];
if (aliasFrom.length() < 3 || aliasFrom.length() > 8) {
throw new IllegalArgumentException(
"Incorrect key [" + aliasFrom + "] in alias:territory.");
}
if (aliasTo.length() < 3 || aliasTo.length() > 8) {
// Ignore replacement < 3 for now. see CLDR-14312
// throw new IllegalArgumentException(
// "Incorrect value [" + aliasTo + "] in alias:subdivision.");
continue;
}
subdivisionAliasMap.put(aliasFrom, aliasTo);
}

aliasDataIsLoaded = true;
}
Expand Down Expand Up @@ -1591,6 +1631,11 @@ private boolean replaceVariant() {
}
return false;
}

private String replaceSubdivision(String subdivision) {
return subdivisionAliasMap.get(subdivision);
}

};

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5216,6 +5216,21 @@ public void TestCanonical() {
// ICU-21344
Assert.assertEquals("ku-Arab-IQ", canonicalTag("ku-Arab-NT"));

// ICU-21402
Assert.assertEquals("und-u-rg-no50", canonicalTag("und-u-rg-no23"));
Assert.assertEquals("und-u-rg-cnbj", canonicalTag("und-u-rg-cn11"));
Assert.assertEquals("und-u-rg-cz110", canonicalTag("und-u-rg-cz10a"));
Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-fra"));
Assert.assertEquals("und-u-rg-frges", canonicalTag("und-u-rg-frg"));
Assert.assertEquals("und-u-rg-lucl", canonicalTag("und-u-rg-lud"));

Assert.assertEquals("und-NO-u-sd-no50", canonicalTag("und-NO-u-sd-no23"));
Assert.assertEquals("und-CN-u-sd-cnbj", canonicalTag("und-CN-u-sd-cn11"));
Assert.assertEquals("und-CZ-u-sd-cz110", canonicalTag("und-CZ-u-sd-cz10a"));
Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-fra"));
Assert.assertEquals("und-FR-u-sd-frges", canonicalTag("und-FR-u-sd-frg"));
Assert.assertEquals("und-LU-u-sd-lucl", canonicalTag("und-LU-u-sd-lud"));

// ICU-21401
Assert.assertEquals("xtg", canonicalTag("cel-gaulish"));
}
Expand Down