Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Indexable#join when all elements are strings #6635

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions spec/std/char_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,13 @@ describe "Char" do
describe "+" do
it "does for both ascii" do
str = 'f' + "oo"
str.bytesize.should eq(3)
str.@length.should eq(3)
str.@length.should eq(3) # Check that it was precomputed
str.should eq("foo")
end

it "does for both unicode" do
str = '青' + "旅路"
str.@length.should eq(3)
str.@length.should eq(3) # Check that it was precomputed
str.should eq("青旅路")
end
end
Expand Down
52 changes: 52 additions & 0 deletions spec/std/indexable_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,34 @@ private class SafeIndexable
end
end

private class SafeStringIndexable
include Indexable(String)

property size

def initialize(@size : Int32)
end

def unsafe_at(i)
raise IndexError.new unless 0 <= i < size
i.to_s
end
end

private class SafeMixedIndexable
include Indexable(String | Int32)

property size

def initialize(@size : Int32)
end

def unsafe_at(i)
raise IndexError.new unless 0 <= i < size
i.to_s
end
end

describe Indexable do
it "does index with big negative offset" do
indexable = SafeIndexable.new(3)
Expand Down Expand Up @@ -114,4 +142,28 @@ describe Indexable do
return_value.should eq(indexable)
last_element.should eq(3)
end

it "joins strings (empty case)" do
indexable = SafeStringIndexable.new(0)
indexable.join.should eq("")
indexable.join(", ").should eq("")
end

it "joins strings (non-empty case)" do
indexable = SafeStringIndexable.new(12)
indexable.join(", ").should eq("0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")
indexable.join(98).should eq("098198298398498598698798898998109811")
end

it "joins non-strings" do
indexable = SafeIndexable.new(12)
indexable.join(", ").should eq("0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")
indexable.join(98).should eq("098198298398498598698798898998109811")
end

it "joins when T has String" do
indexable = SafeMixedIndexable.new(12)
indexable.join(", ").should eq("0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11")
indexable.join(98).should eq("098198298398498598698798898998109811")
end
end
2 changes: 0 additions & 2 deletions spec/std/string_builder_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ describe String::Builder do
builder << 456
end
str.should eq("123456")
str.size.should eq(6)
str.bytesize.should eq(6)
end

it "raises if invokes to_s twice" do
Expand Down
102 changes: 22 additions & 80 deletions spec/std/string_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -996,56 +996,37 @@ describe "String" do
it { "hello world\\r\\n".delete("X-\\w").should eq("hello orldrn") }

it "deletes one char" do
deleted = "foobar".delete('o')
deleted.bytesize.should eq(4)
deleted.should eq("fbar")

deleted = "foobar".delete('x')
deleted.bytesize.should eq(6)
deleted.should eq("foobar")
"foobar".delete('o').should eq("fbar")
"foobar".delete('x').should eq("foobar")
end
end

it "reverses string" do
reversed = "foobar".reverse
reversed.bytesize.should eq(6)
reversed.should eq("raboof")
"foobar".reverse.should eq("raboof")
end

it "reverses utf-8 string" do
reversed = "こんいちは".reverse
reversed.bytesize.should eq(15)
reversed.size.should eq(5)
reversed.should eq("はちいんこ")
"こんいちは".reverse.should eq("はちいんこ")
end

it "reverses taking grapheme clusters into account" do
reversed = "noël".reverse
reversed.bytesize.should eq("noël".bytesize)
reversed.size.should eq("noël".size)
reversed.should eq("lëon")
"noël".reverse.should eq("lëon")
end

describe "sub" do
it "subs char with char" do
replaced = "foobar".sub('o', 'e')
replaced.bytesize.should eq(6)
replaced.should eq("feobar")
"foobar".sub('o', 'e').should eq("feobar")
end

it "subs char with string" do
replaced = "foobar".sub('o', "ex")
replaced.bytesize.should eq(7)
replaced.should eq("fexobar")
"foobar".sub('o', "ex").should eq("fexobar")
end

it "subs char with string" do
replaced = "foobar".sub do |char|
char.should eq 'f'
"some"
end
replaced.bytesize.should eq(9)
replaced.should eq("someoobar")
end.should eq("someoobar")

empty = ""
empty.sub { 'f' }.should be(empty)
Expand Down Expand Up @@ -1184,17 +1165,11 @@ describe "String" do
end

it "subs at index with char" do
string = "hello".sub(1, 'a')
string.should eq("hallo")
string.bytesize.should eq(5)
string.size.should eq(5)
"hello".sub(1, 'a').should eq("hallo")
end

it "subs at index with char, non-ascii" do
string = "あいうえお".sub(2, 'の')
string.should eq("あいのえお")
string.size.should eq(5)
string.bytesize.should eq("あいのえお".bytesize)
"あいうえお".sub(2, 'の').should eq("あいのえお")
end

it "subs at negative index with char" do
Expand All @@ -1205,66 +1180,41 @@ describe "String" do
end

it "subs at index with string" do
string = "hello".sub(1, "eee")
string.should eq("heeello")
string.bytesize.should eq(7)
string.size.should eq(7)
"hello".sub(1, "eee").should eq("heeello")
end

it "subs at negative index with string" do
string = "hello".sub(-1, "ooo")
string.should eq("hellooo")
string.bytesize.should eq(7)
string.size.should eq(7)
"hello".sub(-1, "ooo").should eq("hellooo")
end

it "subs at index with string, non-ascii" do
string = "あいうえお".sub(2, "けくこ")
string.should eq("あいけくこえお")
string.bytesize.should eq("あいけくこえお".bytesize)
string.size.should eq(7)
"あいうえお".sub(2, "けくこ").should eq("あいけくこえお")
end

it "subs range with char" do
string = "hello".sub(1..2, 'a')
string.should eq("halo")
string.bytesize.should eq(4)
string.size.should eq(4)
"hello".sub(1..2, 'a').should eq("halo")
end

it "subs range with char, non-ascii" do
string = "あいうえお".sub(1..2, 'け')
string.should eq("あけえお")
string.size.should eq(4)
string.bytesize.should eq("あけえお".bytesize)
"あいうえお".sub(1..2, 'け').should eq("あけえお")
end

it "subs range with string" do
string = "hello".sub(1..2, "eee")
string.should eq("heeelo")
string.size.should eq(6)
string.bytesize.should eq(6)
"hello".sub(1..2, "eee").should eq("heeelo")
end

it "subs range with string, non-ascii" do
string = "あいうえお".sub(1..2, "けくこ")
string.should eq("あけくこえお")
string.size.should eq(6)
string.bytesize.should eq("あけくこえお".bytesize)
"あいうえお".sub(1..2, "けくこ").should eq("あけくこえお")
end
end

describe "gsub" do
it "gsubs char with char" do
replaced = "foobar".gsub('o', 'e')
replaced.bytesize.should eq(6)
replaced.should eq("feebar")
"foobar".gsub('o', 'e').should eq("feebar")
end

it "gsubs char with string" do
replaced = "foobar".gsub('o', "ex")
replaced.bytesize.should eq(8)
replaced.should eq("fexexbar")
"foobar".gsub('o', "ex").should eq("fexexbar")
end

it "gsubs char with string (nop)" do
Expand All @@ -1289,7 +1239,6 @@ describe "String" do
char
end
end
replaced.bytesize.should eq(18)
replaced.should eq("somethingthingbexr")
end

Expand Down Expand Up @@ -1508,39 +1457,32 @@ describe "String" do
end

it "does *" do
str = "foo" * 10
str.bytesize.should eq(30)
str.should eq("foofoofoofoofoofoofoofoofoofoo")
("foo" * 10).should eq("foofoofoofoofoofoofoofoofoofoo")
end

describe "+" do
it "does for both ascii" do
str = "foo" + "bar"
str.bytesize.should eq(6)
str.@length.should eq(6)
str.@length.should eq(6) # Check that it was pre-computed
str.should eq("foobar")
end

it "does for both unicode" do
str = "青い" + "旅路"
str.@length.should eq(4)
str.@length.should eq(4) # Check that it was pre-computed
str.should eq("青い旅路")
end

it "does with ascii char" do
str = "foo"
str2 = str + '/'
str2.should eq("foo/")
str2.bytesize.should eq(4)
str2.size.should eq(4)
end

it "does with unicode char" do
str = "fooba"
str2 = str + 'る'
str2.should eq("foobaる")
str2.bytesize.should eq(8)
str2.size.should eq(6)
end

it "does when right is empty" do
Expand Down
63 changes: 63 additions & 0 deletions src/indexable.cr
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,69 @@ module Indexable(T)
self
end

# Optimized version of `Enumerable#join` that performs better when
# all of the elements in this indexable are strings: the total string
# bytesize to return can be computed before creating the final string,
# which performs better because there's no need to do reallocations.
def join(separator = "")
return "" if empty?

{% if T == String %}
join_strings(separator)
{% elsif String < T %}
if all?(&.is_a?(String))
join_strings(separator)
else
super(separator)
end
{% else %}
super(separator)
{% end %}
end

private def join_strings(separator)
separator = separator.to_s

# The total bytesize of the string to return is:
length =
((self.size - 1) * separator.bytesize) + # the bytesize of all separators
self.sum(&.to_s.bytesize) # the bytesize of all the elements

String.new(length) do |buffer|
# Also compute the UTF-8 size if we can
size = 0
size_known = true

each_with_index do |elem, i|
# elem is guaranteed to be a String, but the compiler doesn't know this
# if we enter via the all?(&.is_a?(String)) branch.
elem = elem.to_s

# Copy separator to buffer
if i != 0
buffer.copy_from(separator.to_unsafe, separator.bytesize)
buffer += separator.bytesize
end

# Copy element to buffer
buffer.copy_from(elem.to_unsafe, elem.bytesize)
buffer += elem.bytesize

# Check whether we'll know the final UTF-8 size
if elem.size_known?
size += elem.size
Copy link
Contributor

@RX14 RX14 Sep 1, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is wrong because the size of the seperator isn't added. The specs pass though, is there any way to test this in the specs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, good catch! Yes, I'll check it now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed!

else
size_known = false
end
end

# Add size of all separators
size += (self.size - 1) * separator.size if size_known

{length, size_known ? size : 0}
end
end

# Returns an `Array` with all the elements in the collection.
#
# ```
Expand Down
Loading