Skip to content

Commit 4916640

Browse files
committed
Improve pprof encoding performance by replacing MessageSet with Ruby hash
One of the things I've learned since I started doing some performance work in the profiler, is how expensive is to hash arrays of items, as `MessageSet` (`ObjectSet`) does. While looking for improvements to pprof encoding, I realized that the locations `MessageSet` was implementing a hashmap of `Profiling::BacktraceLocation`s to pprof `Location` objects in a rather roundabout way -- it used the full contents of the `BacktraceLocation` objects as a key, rather than the object itself. By replacing this arrangement with a regular Ruby hash that used the `BacktraceLocation` as a key, I was able to quite improve the performance of pprof encoding. In the <benchmarks/profiler_submission.rb> benchmark (Ruby 2.7.3/macOS 10.15.7/i7-1068NG7) I'm getting a > 2x speedup when compared with current master (or 3.4x speedup over dd-trace-rb 0.49.0): ``` exporter locations-hashmap 9.241 (± 0.0%) i/s - 93.000 in 10.081019s Comparison: exporter locations-hashmap: 9.2 i/s exporter cached-build-location-v2 (current master): 4.4 i/s - 2.12x (± 0.00) slower exporter baseline (before any work): 2.7 i/s - 3.42x (± 0.00) slower ```
1 parent 09ed439 commit 4916640

File tree

6 files changed

+65
-110
lines changed

6 files changed

+65
-110
lines changed

benchmarks/profiler_submission.rb

+5-2
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,16 @@ def run_benchmark
6060
run_once
6161
end
6262

63-
x.save! 'profiler-submission-results.ipsbench'
63+
x.save! 'profiler-submission-results.json'
6464
x.compare!
6565
end
6666
end
6767

6868
def run_forever
69-
run_once while true
69+
while true
70+
run_once
71+
print '.'
72+
end
7073
end
7174

7275
def run_once

lib/ddtrace/profiling/pprof/builder.rb

+19-26
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,25 @@ class Builder
2121

2222
def initialize
2323
@functions = MessageSet.new(1)
24-
@locations = MessageSet.new(1)
24+
@locations = initialize_locations_hash
2525
@mappings = MessageSet.new(1)
2626
@sample_types = MessageSet.new
2727
@samples = []
2828
@string_table = StringTable.new
29-
# Cache these procs, since it's pretty expensive to keep recreating them
30-
@build_location = method(:build_location).to_proc
29+
30+
# Cache this proc, since it's pretty expensive to keep recreating it
3131
@build_function = method(:build_function).to_proc
3232
end
3333

34+
# The locations hash maps unique BacktraceLocation instances to their corresponding pprof Location objects;
35+
# there's a 1:1 correspondence, since BacktraceLocations were already deduped
36+
def initialize_locations_hash
37+
sequence = Utils::Sequence.new(1)
38+
Hash.new do |locations_hash, backtrace_location|
39+
locations_hash[backtrace_location] = build_location(sequence.next, backtrace_location)
40+
end
41+
end
42+
3443
def encode_profile(profile)
3544
Perftools::Profiles::Profile.encode(profile).force_encoding(DEFAULT_ENCODING)
3645
end
@@ -40,7 +49,7 @@ def build_profile
4049
sample_type: @sample_types.messages,
4150
sample: @samples,
4251
mapping: @mappings.messages,
43-
location: @locations.messages,
52+
location: @locations.values,
4453
function: @functions.messages,
4554
string_table: @string_table.strings
4655
)
@@ -54,45 +63,29 @@ def build_value_type(type, unit)
5463
end
5564

5665
def build_locations(backtrace_locations, length)
57-
locations = backtrace_locations.collect do |backtrace_location|
58-
@locations.fetch(
59-
# Filename
60-
backtrace_location.path,
61-
# Line number
62-
backtrace_location.lineno,
63-
# Function name
64-
backtrace_location.base_label,
65-
# Build function
66-
&@build_location
67-
)
68-
end
66+
locations = backtrace_locations.collect { |backtrace_location| @locations[backtrace_location] }
6967

7068
omitted = length - backtrace_locations.length
7169

7270
# Add placeholder stack frame if frames were truncated
7371
if omitted > 0
7472
desc = omitted == 1 ? DESC_FRAME_OMITTED : DESC_FRAMES_OMITTED
75-
locations << @locations.fetch(
76-
''.freeze,
77-
0,
78-
"#{omitted} #{desc}",
79-
&@build_location
80-
)
73+
locations << @locations[Profiling::BacktraceLocation.new(''.freeze, 0, "#{omitted} #{desc}")]
8174
end
8275

8376
locations
8477
end
8578

86-
def build_location(id, filename, line_number, function_name = nil)
79+
def build_location(id, backtrace_location)
8780
Perftools::Profiles::Location.new(
8881
id: id,
8982
line: [build_line(
9083
@functions.fetch(
91-
filename,
92-
function_name,
84+
backtrace_location.path,
85+
backtrace_location.base_label,
9386
&@build_function
9487
).id,
95-
line_number
88+
backtrace_location.lineno
9689
)]
9790
)
9891
end

spec/ddtrace/profiling/integration_spec.rb

+1-8
Original file line numberDiff line numberDiff line change
@@ -168,14 +168,7 @@ def string_id_for(string)
168168

169169
include_context 'StackSample events' do
170170
def stack_frame_to_location_id(backtrace_location)
171-
template.builder.locations.fetch(
172-
# Filename
173-
backtrace_location.path,
174-
# Line number
175-
backtrace_location.lineno,
176-
# Function name
177-
backtrace_location.base_label
178-
) { raise 'Unknown stack frame!' }.id
171+
template.builder.locations.fetch(backtrace_location) { raise 'Unknown stack frame!' }.id
179172
end
180173

181174
def stack_frame_to_function_id(backtrace_location)

spec/ddtrace/profiling/pprof/builder_spec.rb

+36-72
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,6 @@ def string_id_for(string)
2121
builder.string_table.fetch(string)
2222
end
2323

24-
describe '#initialize' do
25-
it do
26-
is_expected.to have_attributes(
27-
functions: kind_of(Datadog::Profiling::Pprof::MessageSet),
28-
locations: kind_of(Datadog::Profiling::Pprof::MessageSet),
29-
mappings: kind_of(Datadog::Profiling::Pprof::MessageSet),
30-
sample_types: kind_of(Datadog::Profiling::Pprof::MessageSet),
31-
samples: [],
32-
string_table: kind_of(Datadog::Profiling::Pprof::StringTable)
33-
)
34-
end
35-
end
36-
3724
describe '#encode_profile' do
3825
subject(:build_profile) { builder.encode_profile(profile) }
3926

@@ -66,7 +53,7 @@ def string_id_for(string)
6653
sample_type: builder.sample_types.messages,
6754
sample: builder.samples,
6855
mapping: builder.mappings.messages,
69-
location: builder.locations.messages,
56+
location: builder.locations.values,
7057
function: builder.functions.messages,
7158
string_table: builder.string_table.strings
7259
)
@@ -98,27 +85,13 @@ def string_id_for(string)
9885
let(:backtrace_locations) { Thread.current.backtrace_locations.first(3) }
9986
let(:length) { backtrace_locations.length }
10087

101-
let(:expected_locations) do
102-
backtrace_locations.each_with_object({}) do |loc, map|
103-
key = [loc.path, loc.lineno, loc.base_label]
104-
# Use double instead of instance_double because protobuf doesn't define verifiable methods
105-
map[key] = double('Perftools::Profiles::Location')
106-
end
107-
end
108-
109-
before do
110-
expect(builder.locations).to receive(:fetch).at_least(backtrace_locations.length).times do |*args, &block|
111-
expect(expected_locations).to include(args)
112-
expect(block.source_location).to eq(builder.method(:build_location).source_location)
113-
expected_locations[args]
114-
end
115-
end
116-
11788
context 'given backtrace locations matching length' do
118-
it do
119-
is_expected.to be_a_kind_of(Array)
120-
is_expected.to have(backtrace_locations.length).items
121-
is_expected.to include(*expected_locations.values)
89+
it { is_expected.to be_a_kind_of(Array) }
90+
it { is_expected.to have(backtrace_locations.length).items }
91+
92+
it 'converts the BacktraceLocations to matching Perftools::Profiles::Location objects' do
93+
# Lines are the simplest to compare, since they aren't converted to ids
94+
expect(build_locations.map { |location| location.to_h[:line].first[:line] }).to eq backtrace_locations.map(&:lineno)
12295
end
12396
end
12497

@@ -128,56 +101,47 @@ def string_id_for(string)
128101
let(:omitted_location) { double('Perftools::Profiles::Location') }
129102

130103
before do
131-
expected_locations[['', 0, "#{omitted} #{described_class::DESC_FRAMES_OMITTED}"]] = omitted_location
104+
omitted_backtrace_location =
105+
Datadog::Profiling::BacktraceLocation.new('', 0, "#{omitted} #{described_class::DESC_FRAMES_OMITTED}")
106+
107+
builder.locations[omitted_backtrace_location] = omitted_location
132108
end
133109

134-
it do
135-
is_expected.to be_a_kind_of(Array)
136-
is_expected.to have(backtrace_locations.length + 1).items
137-
is_expected.to include(*expected_locations.values)
138-
expect(build_locations.last).to be(omitted_location)
110+
it { is_expected.to have(backtrace_locations.length + 1).items }
111+
112+
it 'converts the BacktraceLocations to matching Perftools::Profiles::Location objects' do
113+
expect(build_locations[0..-2].map { |location| location.to_h[:line].first[:line] })
114+
.to eq backtrace_locations.map(&:lineno)
115+
end
116+
117+
it 'adds a placeholder frame as the last element to indicate the omitted frames' do
118+
expect(build_locations.last).to be omitted_location
139119
end
140120
end
141121
end
142122

143123
describe '#build_location' do
144-
subject(:build_location) { builder.build_location(id, filename, line_number) }
124+
subject(:build_location) do
125+
builder.build_location(location_id, Datadog::Profiling::BacktraceLocation.new(function_name, line_number, filename))
126+
end
145127

146-
let(:id) { id_sequence.next }
147-
let(:filename) { double('filename') }
128+
let(:location_id) { rand_int }
148129
let(:line_number) { rand_int }
130+
let(:function_name) { 'the_function_name' }
131+
let(:filename) { 'the_file_name.rb' }
149132

150-
# Use double instead of instance_double because protobuf doesn't define verifiable methods
151-
let(:function) { double('Perftools::Profiles::Function', id: id_sequence.next) }
152-
153-
before do
154-
expect(builder.functions).to receive(:fetch) do |*args, &block|
155-
expect(args).to eq([filename, nil])
156-
expect(block.source_location).to eq(builder.method(:build_function).source_location)
157-
function
158-
end
159-
end
133+
it 'creates a new Perftools::Profiles::Location object with the contents of the BacktraceLocation' do
134+
function = double('Function', id: rand_int)
160135

161-
context 'given no function name' do
162-
it do
163-
is_expected.to be_a_kind_of(Perftools::Profiles::Location)
164-
is_expected.to have_attributes(
165-
id: id,
166-
line: array_including(kind_of(Perftools::Profiles::Line))
167-
)
168-
expect(build_location.line).to have(1).items
169-
end
170-
171-
describe 'returns a Location with Line that' do
172-
subject(:line) { build_location.line.first }
136+
expect(Perftools::Profiles::Function)
137+
.to receive(:new).with(hash_including(filename: string_id_for(filename), name: string_id_for(function_name)))
138+
.and_return(function)
173139

174-
it do
175-
is_expected.to have_attributes(
176-
function_id: function.id,
177-
line: line_number
178-
)
179-
end
180-
end
140+
expect(build_location).to be_a_kind_of(Perftools::Profiles::Location)
141+
expect(build_location.to_h).to match(hash_including(id: location_id,
142+
line: [{
143+
function_id: function.id, line: line_number
144+
}]))
181145
end
182146
end
183147

spec/ddtrace/profiling/pprof/stack_sample_spec.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def string_id_for(string)
314314

315315
it 'each map to a Location on the profile' do
316316
locations.each do |id|
317-
expect(builder.locations.messages[id - 1])
317+
expect(builder.locations.values[id - 1])
318318
.to be_kind_of(Perftools::Profiles::Location)
319319
end
320320
end

spec/ddtrace/profiling/spec_helper.rb

+3-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,9 @@ def build_stack_sample(
7575

7676
Datadog::Profiling::Events::StackSample.new(
7777
nil,
78-
locations,
78+
locations.map do |location|
79+
Datadog::Profiling::BacktraceLocation.new(location.base_label, location.lineno, location.path)
80+
end,
7981
locations.length,
8082
thread_id || rand(1e9),
8183
trace_id || rand(1e9),

0 commit comments

Comments
 (0)