Skip to content

Commit

Permalink
Fix unicode alternate ranges generation (crystal-lang#7924)
Browse files Browse the repository at this point in the history
* Fix unicode alternate ranges generation

The last codepoint in a range is the one for an uppercase letter and
the next  codepoint is the one for the lowercase one, and it should be
considered part of the alternating range.

* Regenerate unicode data

* Add test for crystal-lang#7922
  • Loading branch information
asterite authored and dnamsons committed Jan 10, 2020
1 parent 4d0cbc7 commit 36de2d6
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 59 deletions.
10 changes: 8 additions & 2 deletions scripts/generate_unicode_data.cr
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def alternate_ranges(ranges)
# Continue streak
else
if first_codepoint
alternate << AlternateRange.new(first_codepoint, last_codepoint.not_nil!)
alternate << new_alternate_range(first_codepoint, last_codepoint)
end
first_codepoint = codepoint
end
Expand All @@ -78,12 +78,18 @@ def alternate_ranges(ranges)
end

if first_codepoint
alternate << AlternateRange.new(first_codepoint, last_codepoint.not_nil!)
alternate << new_alternate_range(first_codepoint, last_codepoint)
end

alternate
end

def new_alternate_range(first_codepoint, last_codepoint)
# The last codepoint is the one for the uppercase letter and we
# need to also consider the next codepoint for the lowercase one.
AlternateRange.new(first_codepoint, last_codepoint.not_nil! + 1)
end

def strides(entries, targets)
strides = [] of Stride

Expand Down
1 change: 1 addition & 0 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ describe "String" do
it { "aeiou".upcase(Unicode::CaseOptions::Turkic).should eq("AEİOU") }
it { "baffle".upcase.should eq("BAFFLE") }
it { "".upcase.should eq("FF") }
it { "ňž".upcase.should eq("ŇŽ") } # #7922
end

describe "capitalize" do
Expand Down
114 changes: 57 additions & 57 deletions src/unicode/data.cr
Original file line number Diff line number Diff line change
Expand Up @@ -293,63 +293,63 @@ module Unicode
private def self.alternate_ranges
@@alternate_ranges ||= begin
data = Array({Int32, Int32}).new(57)
put(data, 256, 302)
put(data, 306, 310)
put(data, 313, 327)
put(data, 330, 374)
put(data, 377, 381)
put(data, 386, 388)
put(data, 391, 391)
put(data, 395, 395)
put(data, 401, 401)
put(data, 408, 408)
put(data, 416, 420)
put(data, 423, 423)
put(data, 428, 428)
put(data, 431, 431)
put(data, 435, 437)
put(data, 440, 440)
put(data, 444, 444)
put(data, 453, 453)
put(data, 456, 456)
put(data, 459, 475)
put(data, 478, 494)
put(data, 498, 500)
put(data, 504, 542)
put(data, 546, 562)
put(data, 571, 571)
put(data, 577, 577)
put(data, 582, 590)
put(data, 880, 882)
put(data, 886, 886)
put(data, 984, 1006)
put(data, 1015, 1015)
put(data, 1018, 1018)
put(data, 1120, 1152)
put(data, 1162, 1214)
put(data, 1217, 1229)
put(data, 1232, 1326)
put(data, 7680, 7828)
put(data, 7840, 7934)
put(data, 8579, 8579)
put(data, 11360, 11360)
put(data, 11367, 11371)
put(data, 11378, 11378)
put(data, 11381, 11381)
put(data, 11392, 11490)
put(data, 11499, 11501)
put(data, 11506, 11506)
put(data, 42560, 42604)
put(data, 42624, 42650)
put(data, 42786, 42798)
put(data, 42802, 42862)
put(data, 42873, 42875)
put(data, 42878, 42886)
put(data, 42891, 42891)
put(data, 42896, 42898)
put(data, 42902, 42920)
put(data, 42932, 42942)
put(data, 42946, 42946)
put(data, 256, 303)
put(data, 306, 311)
put(data, 313, 328)
put(data, 330, 375)
put(data, 377, 382)
put(data, 386, 389)
put(data, 391, 392)
put(data, 395, 396)
put(data, 401, 402)
put(data, 408, 409)
put(data, 416, 421)
put(data, 423, 424)
put(data, 428, 429)
put(data, 431, 432)
put(data, 435, 438)
put(data, 440, 441)
put(data, 444, 445)
put(data, 453, 454)
put(data, 456, 457)
put(data, 459, 476)
put(data, 478, 495)
put(data, 498, 501)
put(data, 504, 543)
put(data, 546, 563)
put(data, 571, 572)
put(data, 577, 578)
put(data, 582, 591)
put(data, 880, 883)
put(data, 886, 887)
put(data, 984, 1007)
put(data, 1015, 1016)
put(data, 1018, 1019)
put(data, 1120, 1153)
put(data, 1162, 1215)
put(data, 1217, 1230)
put(data, 1232, 1327)
put(data, 7680, 7829)
put(data, 7840, 7935)
put(data, 8579, 8580)
put(data, 11360, 11361)
put(data, 11367, 11372)
put(data, 11378, 11379)
put(data, 11381, 11382)
put(data, 11392, 11491)
put(data, 11499, 11502)
put(data, 11506, 11507)
put(data, 42560, 42605)
put(data, 42624, 42651)
put(data, 42786, 42799)
put(data, 42802, 42863)
put(data, 42873, 42876)
put(data, 42878, 42887)
put(data, 42891, 42892)
put(data, 42896, 42899)
put(data, 42902, 42921)
put(data, 42932, 42943)
put(data, 42946, 42947)
data
end
end
Expand Down

0 comments on commit 36de2d6

Please sign in to comment.