diff --git a/scripts/generate_unicode_data.cr b/scripts/generate_unicode_data.cr index 58c385217d5d..a807c750253a 100644 --- a/scripts/generate_unicode_data.cr +++ b/scripts/generate_unicode_data.cr @@ -69,7 +69,7 @@ def alternate_ranges(ranges) # Continue streak else if first_codepoint - alternate << AlternateRange.new(first_codepoint, last_codepoint.not_nil!) + alternate << new_alternate_range(first_codepoint, last_codepoint) end first_codepoint = codepoint end @@ -78,12 +78,18 @@ def alternate_ranges(ranges) end if first_codepoint - alternate << AlternateRange.new(first_codepoint, last_codepoint.not_nil!) + alternate << new_alternate_range(first_codepoint, last_codepoint) end alternate end +def new_alternate_range(first_codepoint, last_codepoint) + # The last codepoint is the one for the uppercase letter and we + # need to also consider the next codepoint for the lowercase one. + AlternateRange.new(first_codepoint, last_codepoint.not_nil! + 1) +end + def strides(entries, targets) strides = [] of Stride diff --git a/spec/std/string_spec.cr b/spec/std/string_spec.cr index 73fe7929edde..80dcd6f281a0 100644 --- a/spec/std/string_spec.cr +++ b/spec/std/string_spec.cr @@ -534,6 +534,7 @@ describe "String" do it { "aeiou".upcase(Unicode::CaseOptions::Turkic).should eq("AEİOU") } it { "baffle".upcase.should eq("BAFFLE") } it { "ff".upcase.should eq("FF") } + it { "ňž".upcase.should eq("ŇŽ") } # #7922 end describe "capitalize" do diff --git a/src/unicode/data.cr b/src/unicode/data.cr index d0e2e3098267..d5d27eb27b61 100644 --- a/src/unicode/data.cr +++ b/src/unicode/data.cr @@ -293,63 +293,63 @@ module Unicode private def self.alternate_ranges @@alternate_ranges ||= begin data = Array({Int32, Int32}).new(57) - put(data, 256, 302) - put(data, 306, 310) - put(data, 313, 327) - put(data, 330, 374) - put(data, 377, 381) - put(data, 386, 388) - put(data, 391, 391) - put(data, 395, 395) - put(data, 401, 401) - put(data, 408, 408) - put(data, 416, 420) - put(data, 423, 423) - put(data, 428, 428) - put(data, 431, 431) - put(data, 435, 437) - put(data, 440, 440) - put(data, 444, 444) - put(data, 453, 453) - put(data, 456, 456) - put(data, 459, 475) - put(data, 478, 494) - put(data, 498, 500) - put(data, 504, 542) - put(data, 546, 562) - put(data, 571, 571) - put(data, 577, 577) - put(data, 582, 590) - put(data, 880, 882) - put(data, 886, 886) - put(data, 984, 1006) - put(data, 1015, 1015) - put(data, 1018, 1018) - put(data, 1120, 1152) - put(data, 1162, 1214) - put(data, 1217, 1229) - put(data, 1232, 1326) - put(data, 7680, 7828) - put(data, 7840, 7934) - put(data, 8579, 8579) - put(data, 11360, 11360) - put(data, 11367, 11371) - put(data, 11378, 11378) - put(data, 11381, 11381) - put(data, 11392, 11490) - put(data, 11499, 11501) - put(data, 11506, 11506) - put(data, 42560, 42604) - put(data, 42624, 42650) - put(data, 42786, 42798) - put(data, 42802, 42862) - put(data, 42873, 42875) - put(data, 42878, 42886) - put(data, 42891, 42891) - put(data, 42896, 42898) - put(data, 42902, 42920) - put(data, 42932, 42942) - put(data, 42946, 42946) + put(data, 256, 303) + put(data, 306, 311) + put(data, 313, 328) + put(data, 330, 375) + put(data, 377, 382) + put(data, 386, 389) + put(data, 391, 392) + put(data, 395, 396) + put(data, 401, 402) + put(data, 408, 409) + put(data, 416, 421) + put(data, 423, 424) + put(data, 428, 429) + put(data, 431, 432) + put(data, 435, 438) + put(data, 440, 441) + put(data, 444, 445) + put(data, 453, 454) + put(data, 456, 457) + put(data, 459, 476) + put(data, 478, 495) + put(data, 498, 501) + put(data, 504, 543) + put(data, 546, 563) + put(data, 571, 572) + put(data, 577, 578) + put(data, 582, 591) + put(data, 880, 883) + put(data, 886, 887) + put(data, 984, 1007) + put(data, 1015, 1016) + put(data, 1018, 1019) + put(data, 1120, 1153) + put(data, 1162, 1215) + put(data, 1217, 1230) + put(data, 1232, 1327) + put(data, 7680, 7829) + put(data, 7840, 7935) + put(data, 8579, 8580) + put(data, 11360, 11361) + put(data, 11367, 11372) + put(data, 11378, 11379) + put(data, 11381, 11382) + put(data, 11392, 11491) + put(data, 11499, 11502) + put(data, 11506, 11507) + put(data, 42560, 42605) + put(data, 42624, 42651) + put(data, 42786, 42799) + put(data, 42802, 42863) + put(data, 42873, 42876) + put(data, 42878, 42887) + put(data, 42891, 42892) + put(data, 42896, 42899) + put(data, 42902, 42921) + put(data, 42932, 42943) + put(data, 42946, 42947) data end end