Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize String#compare in case of ASCII only #7352

Merged
merged 1 commit into from
Apr 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 32 additions & 16 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2245,22 +2245,38 @@ describe "String" do
end
end

it "compares non-case insensitive" do
"fo".compare("foo").should eq(-1)
"foo".compare("fo").should eq(1)
"foo".compare("foo").should eq(0)
"foo".compare("fox").should eq(-1)
"fox".compare("foo").should eq(1)
"foo".compare("Foo").should eq(1)
end

it "compares case insensitive" do
"fo".compare("FOO", case_insensitive: true).should eq(-1)
"foo".compare("FO", case_insensitive: true).should eq(1)
"foo".compare("FOO", case_insensitive: true).should eq(0)
"foo".compare("FOX", case_insensitive: true).should eq(-1)
"fox".compare("FOO", case_insensitive: true).should eq(1)
"fo\u{0000}".compare("FO", case_insensitive: true).should eq(1)
describe "compare" do
it "compares case-sensitive" do
"fo".compare("foo").should eq(-1)
"foo".compare("fo").should eq(1)
"foo".compare("foo").should eq(0)
"foo".compare("fox").should eq(-1)
"fox".compare("foo").should eq(1)
"foo".compare("Foo").should eq(1)
"hällo".compare("Hällo").should eq(1)
"".compare("").should eq(0)
end

it "compares case-insensitive" do
"foo".compare("FO", case_insensitive: true).should eq(1)
"FOO".compare("fo", case_insensitive: true).should eq(1)
"fo".compare("FOO", case_insensitive: true).should eq(-1)
"FOX".compare("foo", case_insensitive: true).should eq(1)
"foo".compare("FOX", case_insensitive: true).should eq(-1)
"foo".compare("FOO", case_insensitive: true).should eq(0)
"hELLo".compare("HellO", case_insensitive: true).should eq(0)
"fo\u{0}".compare("FO", case_insensitive: true).should eq(1)
"fo".compare("FO\u{0}", case_insensitive: true).should eq(-1)
"\u{0}".compare("\u{0}", case_insensitive: true).should eq(0)
"z".compare("hello", case_insensitive: true).should eq(1)
"h".compare("zzz", case_insensitive: true).should eq(-1)
"ä".compare("äA", case_insensitive: true).should eq(-1)
"äÄ".compare("äÄ", case_insensitive: true).should eq(0)
"heIIo".compare("heııo", case_insensitive: true, options: Unicode::CaseOptions::Turkic).should eq(0)
"".compare("abc", case_insensitive: true).should eq(-1)
"abc".compare("", case_insensitive: true).should eq(1)
"abcA".compare("abca", case_insensitive: true).should eq(0)
end
end

it "builds with write_byte" do
Expand Down
62 changes: 42 additions & 20 deletions src/string.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2410,10 +2410,6 @@ class String
# this string is less, equal or greater than *other*, optionally in a *case_insensitive*
# manner.
#
# If *case_insitive* is `false`, this method delegates to `<=>`. Otherwise,
straight-shoota marked this conversation as resolved.
Show resolved Hide resolved
# the strings are compared char-by-char, and ASCII characters are compared in a
# case-insensitive way.
#
# ```
# "abcdef".compare("abcde") # => 1
# "abcdef".compare("abcdef") # => 0
Expand All @@ -2422,29 +2418,55 @@ class String
#
# "abcdef".compare("ABCDEF", case_insensitive: true) # => 0
# "abcdef".compare("ABCDEG", case_insensitive: true) # => -1
#
# "heIIo".compare("heııo", case_insensitive: true, Unicode::CaseOptions::Turkic) # => 0
# ```
def compare(other : String, case_insensitive = false)
def compare(other : String, case_insensitive = false, options = Unicode::CaseOptions::None)
return self <=> other unless case_insensitive

reader1 = Char::Reader.new(self)
reader2 = Char::Reader.new(other)
ch1 = reader1.current_char
ch2 = reader2.current_char
if ascii_only? && other.ascii_only?
position = 0

while reader1.has_next? && reader2.has_next?
cmp = ch1.downcase <=> ch2.downcase
return cmp.sign if cmp != 0
while position < bytesize && position < other.bytesize
byte1 = to_unsafe[position]
byte2 = other.to_unsafe[position]

ch1 = reader1.next_char
ch2 = reader2.next_char
end
# Lowercase both bytes
if 65 <= byte1 <= 90
byte1 += 32
end
if 65 <= byte2 <= 90
byte2 += 32
end

comparison = byte1 <=> byte2
return comparison unless comparison == 0

position += 1
end

if reader1.has_next?
1
elsif reader2.has_next?
-1
bytesize <=> other.bytesize
else
0
reader1 = Char::Reader.new(self)
reader2 = Char::Reader.new(other)
char1 = reader1.current_char
char2 = reader2.current_char

while reader1.has_next? && reader2.has_next?
comparison = char1.downcase(options) <=> char2.downcase(options)
return comparison.sign unless comparison == 0

char1 = reader1.next_char
char2 = reader2.next_char
end

if reader1.has_next?
wooster0 marked this conversation as resolved.
Show resolved Hide resolved
1
elsif reader2.has_next?
-1
else
0
end
end
end

Expand Down