From f61afca262d3a0aa6a8a501db0b1936c60858e35 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 4 Feb 2025 17:22:05 +0200 Subject: [PATCH] gh-129646: Update the locale alias mapping (#129647) * gh-129646: Update the locale alias mapping * Add a NEWS file. --- Lib/locale.py | 52 +++++++++++++++---- ...-02-04-15-16-33.gh-issue-129646.sapk1F.rst | 2 + 2 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-02-04-15-16-33.gh-issue-129646.sapk1F.rst diff --git a/Lib/locale.py b/Lib/locale.py index d8c09f1123d318..213d5e93418cfb 100644 --- a/Lib/locale.py +++ b/Lib/locale.py @@ -860,6 +860,24 @@ def getpreferredencoding(do_setlocale=True): # updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia' # updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154' # updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R' +# +# SS 2025-02-04: +# Updated alias mapping with glibc 2.41 supported locales and the latest +# X lib alias mapping. +# +# These are the differences compared to the old mapping (Python 3.13.1 +# and older): +# +# updated 'c.utf8' -> 'C.UTF-8' to 'en_US.UTF-8' +# updated 'de_it' -> 'de_IT.ISO8859-1' to 'de_IT.UTF-8' +# removed 'de_li.utf8' +# updated 'en_il' -> 'en_IL.UTF-8' to 'en_IL.ISO8859-1' +# removed 'english.iso88591' +# updated 'es_cu' -> 'es_CU.UTF-8' to 'es_CU.ISO8859-1' +# updated 'russian' -> 'ru_RU.KOI8-R' to 'ru_RU.ISO8859-5' +# updated 'sr@latn' -> 'sr_CS.UTF-8@latin' to 'sr_RS.UTF-8@latin' +# removed 'univ' +# removed 'universal' locale_alias = { 'a3': 'az_AZ.KOI8-C', @@ -939,7 +957,7 @@ def getpreferredencoding(do_setlocale=True): 'c.ascii': 'C', 'c.en': 'C', 'c.iso88591': 'en_US.ISO8859-1', - 'c.utf8': 'C.UTF-8', + 'c.utf8': 'en_US.UTF-8', 'c_c': 'C', 'c_c.c': 'C', 'ca': 'ca_ES.ISO8859-1', @@ -956,6 +974,7 @@ def getpreferredencoding(do_setlocale=True): 'chr_us': 'chr_US.UTF-8', 'ckb_iq': 'ckb_IQ.UTF-8', 'cmn_tw': 'cmn_TW.UTF-8', + 'crh_ru': 'crh_RU.UTF-8', 'crh_ua': 'crh_UA.UTF-8', 'croatian': 'hr_HR.ISO8859-2', 'cs': 'cs_CZ.ISO8859-2', @@ -977,11 +996,12 @@ def getpreferredencoding(do_setlocale=True): 'de_be': 'de_BE.ISO8859-1', 'de_ch': 'de_CH.ISO8859-1', 'de_de': 'de_DE.ISO8859-1', - 'de_it': 'de_IT.ISO8859-1', - 'de_li.utf8': 'de_LI.UTF-8', + 'de_it': 'de_IT.UTF-8', + 'de_li': 'de_LI.ISO8859-1', 'de_lu': 'de_LU.ISO8859-1', 'deutsch': 'de_DE.ISO8859-1', 'doi_in': 'doi_IN.UTF-8', + 'dsb_de': 'dsb_DE.UTF-8', 'dutch': 'nl_NL.ISO8859-1', 'dutch.iso88591': 'nl_BE.ISO8859-1', 'dv_mv': 'dv_MV.UTF-8', @@ -1004,7 +1024,7 @@ def getpreferredencoding(do_setlocale=True): 'en_gb': 'en_GB.ISO8859-1', 'en_hk': 'en_HK.ISO8859-1', 'en_ie': 'en_IE.ISO8859-1', - 'en_il': 'en_IL.UTF-8', + 'en_il': 'en_IL.ISO8859-1', 'en_in': 'en_IN.ISO8859-1', 'en_ng': 'en_NG.UTF-8', 'en_nz': 'en_NZ.ISO8859-1', @@ -1020,7 +1040,6 @@ def getpreferredencoding(do_setlocale=True): 'en_zw.utf8': 'en_ZS.UTF-8', 'eng_gb': 'en_GB.ISO8859-1', 'english': 'en_EN.ISO8859-1', - 'english.iso88591': 'en_US.ISO8859-1', 'english_uk': 'en_GB.ISO8859-1', 'english_united-states': 'en_US.ISO8859-1', 'english_united-states.437': 'C', @@ -1036,7 +1055,7 @@ def getpreferredencoding(do_setlocale=True): 'es_cl': 'es_CL.ISO8859-1', 'es_co': 'es_CO.ISO8859-1', 'es_cr': 'es_CR.ISO8859-1', - 'es_cu': 'es_CU.UTF-8', + 'es_cu': 'es_CU.ISO8859-1', 'es_do': 'es_DO.ISO8859-1', 'es_ec': 'es_EC.ISO8859-1', 'es_es': 'es_ES.ISO8859-1', @@ -1086,6 +1105,7 @@ def getpreferredencoding(do_setlocale=True): 'ga_ie': 'ga_IE.ISO8859-1', 'galego': 'gl_ES.ISO8859-1', 'galician': 'gl_ES.ISO8859-1', + 'gbm_in': 'gbm_IN.UTF-8', 'gd': 'gd_GB.ISO8859-1', 'gd_gb': 'gd_GB.ISO8859-1', 'ger_de': 'de_DE.ISO8859-1', @@ -1126,6 +1146,7 @@ def getpreferredencoding(do_setlocale=True): 'icelandic': 'is_IS.ISO8859-1', 'id': 'id_ID.ISO8859-1', 'id_id': 'id_ID.ISO8859-1', + 'ie': 'ie.UTF-8', 'ig_ng': 'ig_NG.UTF-8', 'ik_ca': 'ik_CA.UTF-8', 'in': 'id_ID.ISO8859-1', @@ -1180,6 +1201,7 @@ def getpreferredencoding(do_setlocale=True): 'ks_in': 'ks_IN.UTF-8', 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari', 'ku_tr': 'ku_TR.ISO8859-9', + 'kv_ru': 'kv_RU.UTF-8', 'kw': 'kw_GB.ISO8859-1', 'kw_gb': 'kw_GB.ISO8859-1', 'ky': 'ky_KG.UTF-8', @@ -1198,6 +1220,7 @@ def getpreferredencoding(do_setlocale=True): 'lo_la.mulelao1': 'lo_LA.MULELAO-1', 'lt': 'lt_LT.ISO8859-13', 'lt_lt': 'lt_LT.ISO8859-13', + 'ltg_lv.utf8': 'ltg_LV.UTF-8', 'lv': 'lv_LV.ISO8859-13', 'lv_lv': 'lv_LV.ISO8859-13', 'lzh_tw': 'lzh_TW.UTF-8', @@ -1205,6 +1228,7 @@ def getpreferredencoding(do_setlocale=True): 'mai': 'mai_IN.UTF-8', 'mai_in': 'mai_IN.UTF-8', 'mai_np': 'mai_NP.UTF-8', + 'mdf_ru': 'mdf_RU.UTF-8', 'mfe_mu': 'mfe_MU.UTF-8', 'mg_mg': 'mg_MG.ISO8859-15', 'mhr_ru': 'mhr_RU.UTF-8', @@ -1218,6 +1242,7 @@ def getpreferredencoding(do_setlocale=True): 'ml_in': 'ml_IN.UTF-8', 'mn_mn': 'mn_MN.UTF-8', 'mni_in': 'mni_IN.UTF-8', + 'mnw_mm': 'mnw_MM.UTF-8', 'mr': 'mr_IN.UTF-8', 'mr_in': 'mr_IN.UTF-8', 'ms': 'ms_MY.ISO8859-1', @@ -1286,6 +1311,7 @@ def getpreferredencoding(do_setlocale=True): 'pt_pt': 'pt_PT.ISO8859-1', 'quz_pe': 'quz_PE.UTF-8', 'raj_in': 'raj_IN.UTF-8', + 'rif_ma': 'rif_MA.UTF-8', 'ro': 'ro_RO.ISO8859-2', 'ro_ro': 'ro_RO.ISO8859-2', 'romanian': 'ro_RO.ISO8859-2', @@ -1293,12 +1319,14 @@ def getpreferredencoding(do_setlocale=True): 'ru_ru': 'ru_RU.UTF-8', 'ru_ua': 'ru_UA.KOI8-U', 'rumanian': 'ro_RO.ISO8859-2', - 'russian': 'ru_RU.KOI8-R', + 'russian': 'ru_RU.ISO8859-5', 'rw': 'rw_RW.ISO8859-1', 'rw_rw': 'rw_RW.ISO8859-1', 'sa_in': 'sa_IN.UTF-8', + 'sah_ru': 'sah_RU.UTF-8', 'sat_in': 'sat_IN.UTF-8', 'sc_it': 'sc_IT.UTF-8', + 'scn_it': 'scn_IT.UTF-8', 'sd': 'sd_IN.UTF-8', 'sd_in': 'sd_IN.UTF-8', 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari', @@ -1340,7 +1368,7 @@ def getpreferredencoding(do_setlocale=True): 'sq_mk': 'sq_MK.UTF-8', 'sr': 'sr_RS.UTF-8', 'sr@cyrillic': 'sr_RS.UTF-8', - 'sr@latn': 'sr_CS.UTF-8@latin', + 'sr@latn': 'sr_RS.UTF-8@latin', 'sr_cs': 'sr_CS.UTF-8', 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2', 'sr_cs@latn': 'sr_CS.UTF-8@latin', @@ -1359,14 +1387,17 @@ def getpreferredencoding(do_setlocale=True): 'sr_yu@cyrillic': 'sr_RS.UTF-8', 'ss': 'ss_ZA.ISO8859-1', 'ss_za': 'ss_ZA.ISO8859-1', + 'ssy_er': 'ssy_ER.UTF-8', 'st': 'st_ZA.ISO8859-1', 'st_za': 'st_ZA.ISO8859-1', + 'su_id': 'su_ID.UTF-8', 'sv': 'sv_SE.ISO8859-1', 'sv_fi': 'sv_FI.ISO8859-1', 'sv_se': 'sv_SE.ISO8859-1', 'sw_ke': 'sw_KE.UTF-8', 'sw_tz': 'sw_TZ.UTF-8', 'swedish': 'sv_SE.ISO8859-1', + 'syr': 'syr.UTF-8', 'szl_pl': 'szl_PL.UTF-8', 'ta': 'ta_IN.TSCII-0', 'ta_in': 'ta_IN.TSCII-0', @@ -1393,6 +1424,7 @@ def getpreferredencoding(do_setlocale=True): 'tn': 'tn_ZA.ISO8859-15', 'tn_za': 'tn_ZA.ISO8859-15', 'to_to': 'to_TO.UTF-8', + 'tok': 'tok.UTF-8', 'tpi_pg': 'tpi_PG.UTF-8', 'tr': 'tr_TR.ISO8859-9', 'tr_cy': 'tr_CY.ISO8859-9', @@ -1407,8 +1439,7 @@ def getpreferredencoding(do_setlocale=True): 'ug_cn': 'ug_CN.UTF-8', 'uk': 'uk_UA.KOI8-U', 'uk_ua': 'uk_UA.KOI8-U', - 'univ': 'en_US.utf', - 'universal': 'en_US.utf', + 'univ.utf8': 'en_US.UTF-8', 'universal.utf8@ucs4': 'en_US.UTF-8', 'unm_us': 'unm_US.UTF-8', 'ur': 'ur_PK.CP1256', @@ -1437,6 +1468,7 @@ def getpreferredencoding(do_setlocale=True): 'yo_ng': 'yo_NG.UTF-8', 'yue_hk': 'yue_HK.UTF-8', 'yuw_pg': 'yuw_PG.UTF-8', + 'zgh_ma': 'zgh_MA.UTF-8', 'zh': 'zh_CN.eucCN', 'zh_cn': 'zh_CN.gb2312', 'zh_cn.big5': 'zh_TW.big5', diff --git a/Misc/NEWS.d/next/Library/2025-02-04-15-16-33.gh-issue-129646.sapk1F.rst b/Misc/NEWS.d/next/Library/2025-02-04-15-16-33.gh-issue-129646.sapk1F.rst new file mode 100644 index 00000000000000..742d1d60dfd1bc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-02-04-15-16-33.gh-issue-129646.sapk1F.rst @@ -0,0 +1,2 @@ +Update the locale alias mapping in the :mod:`locale` module to match the +latest X Org locale alias mapping and support new locales in Glibc 2.41.