-
Notifications
You must be signed in to change notification settings - Fork 454
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1028 from danmayer/add_benchmark_and_profile
add a benchmark and profile script and hook into CI
- Loading branch information
Showing
4 changed files
with
518 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
name: Benchmarks | ||
|
||
on: [push, pull_request] | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Install Memcached 1.6.23 | ||
working-directory: scripts | ||
env: | ||
MEMCACHED_VERSION: 1.6.23 | ||
run: | | ||
chmod +x ./install_memcached.sh | ||
./install_memcached.sh | ||
memcached -d | ||
memcached -d -p 11222 | ||
- name: Set up Ruby | ||
uses: ruby/setup-ruby@v1 | ||
with: | ||
ruby-version: 3.2 | ||
bundler-cache: true # 'bundle install' and cache | ||
- name: Run Benchmarks | ||
run: RUBY_YJIT_ENABLE=1 BENCH_TARGET=all bundle exec bin/benchmark |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
name: Profiles | ||
|
||
on: [push, pull_request] | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Install Memcached 1.6.23 | ||
working-directory: scripts | ||
env: | ||
MEMCACHED_VERSION: 1.6.23 | ||
run: | | ||
chmod +x ./install_memcached.sh | ||
./install_memcached.sh | ||
memcached -d | ||
- name: Set up Ruby | ||
uses: ruby/setup-ruby@v1 | ||
with: | ||
ruby-version: 3.4 | ||
bundler-cache: true # 'bundle install' and cache | ||
- name: Run Profiles | ||
run: RUBY_YJIT_ENABLE=1 BENCH_TARGET=all bundle exec bin/profile | ||
- name: Upload profile results | ||
uses: actions/upload-artifact@v4 | ||
with: | ||
name: profile-results | ||
path: | | ||
client_get_profile.json | ||
socket_get_profile.json | ||
client_set_profile.json | ||
socket_set_profile.json | ||
client_get_multi_profile.json | ||
socket_get_multi_profile.json | ||
client_set_multi_profile.json | ||
socket_set_multi_profile.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
#!/usr/bin/env ruby | ||
# frozen_string_literal: true | ||
|
||
# This helps benchmark current performance of Dalli | ||
# as well as compare performance of optimizated and non-optimized calls like multi-set vs set | ||
# | ||
# run with: | ||
# bundle exec bin/benchmark | ||
# RUBY_YJIT_ENABLE=1 BENCH_TARGET=get bundle exec bin/benchmark | ||
require 'bundler/inline' | ||
require 'json' | ||
|
||
gemfile do | ||
source 'https://rubygems.org' | ||
gem 'benchmark-ips' | ||
gem 'logger' | ||
end | ||
|
||
require_relative '../lib/dalli' | ||
require 'benchmark/ips' | ||
require 'monitor' | ||
|
||
## | ||
# StringSerializer is a serializer that avoids the overhead of Marshal or JSON. | ||
## | ||
class StringSerializer | ||
def self.dump(value) | ||
value | ||
end | ||
|
||
def self.load(value) | ||
value | ||
end | ||
end | ||
|
||
dalli_url = ENV['BENCH_CACHE_URL'] || '127.0.0.1:11211' | ||
bench_target = ENV['BENCH_TARGET'] || 'set' | ||
bench_time = (ENV['BENCH_TIME'] || 10).to_i | ||
bench_warmup = (ENV['BENCH_WARMUP'] || 3).to_i | ||
bench_payload_size = (ENV['BENCH_PAYLOAD_SIZE'] || 700_000).to_i | ||
payload = 'B' * bench_payload_size | ||
TERMINATOR = "\r\n" | ||
puts "yjit: #{RubyVM::YJIT.enabled?}" | ||
|
||
client = Dalli::Client.new(dalli_url, serializer: StringSerializer, compress: false, raw: true) | ||
multi_client = Dalli::Client.new('localhost:11211,localhost:11222', serializer: StringSerializer, compress: false, | ||
raw: true) | ||
|
||
# The raw socket implementation is used to benchmark the performance of dalli & the overhead of the various abstractions | ||
# in the library. | ||
sock = TCPSocket.new('127.0.0.1', '11211', connect_timeout: 1) | ||
sock.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, true) | ||
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, true) | ||
# Benchmarks didn't see any performance gains from increasing the SO_RCVBUF buffer size | ||
# sock.setsockopt(Socket::SOL_SOCKET, ::Socket::SO_RCVBUF, 1024 * 1024 * 8) | ||
# Benchamrks did see an improvement in performance when increasing the SO_SNDBUF buffer size | ||
# sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 1024 * 1024 * 8) | ||
|
||
# ensure the clients are all connected and working | ||
client.set('key', payload) | ||
multi_client.set('multi_key', payload) | ||
sock.write("set sock_key 0 3600 #{payload.bytesize}\r\n") | ||
sock.write(payload) | ||
sock.write(TERMINATOR) | ||
sock.flush | ||
sock.readline # clear the buffer | ||
|
||
raise 'dalli client mismatch' if payload != client.get('key') | ||
|
||
raise 'multi dalli client mismatch' if payload != multi_client.get('multi_key') | ||
|
||
sock.write("mg sock_key v\r\n") | ||
sock.readline | ||
sock_value = sock.read(payload.bytesize) | ||
sock.read(TERMINATOR.bytesize) | ||
raise 'sock mismatch' if payload != sock_value | ||
|
||
# ensure we have basic data for the benchmarks and get calls | ||
payload_smaller = 'B' * (bench_payload_size / 10) | ||
pairs = {} | ||
100.times do |i| | ||
pairs["multi_#{i}"] = payload_smaller | ||
end | ||
client.quiet do | ||
pairs.each do |key, value| | ||
client.set(key, value, 3600, raw: true) | ||
end | ||
end | ||
|
||
### | ||
# GC Suite | ||
# benchmark without GC skewing things | ||
### | ||
class GCSuite | ||
def warming(*) | ||
run_gc | ||
end | ||
|
||
def running(*) | ||
run_gc | ||
end | ||
|
||
def warmup_stats(*); end | ||
|
||
def add_report(*); end | ||
|
||
private | ||
|
||
def run_gc | ||
GC.enable | ||
GC.start | ||
GC.disable | ||
end | ||
end | ||
suite = GCSuite.new | ||
|
||
# rubocop:disable Metrics/MethodLength | ||
# rubocop:disable Metrics/PerceivedComplexity | ||
# rubocop:disable Metrics/AbcSize | ||
# rubocop:disable Metrics/CyclomaticComplexity | ||
def sock_get_multi(sock, pairs) | ||
count = pairs.length | ||
pairs.each_key do |key| | ||
count -= 1 | ||
tail = count.zero? ? '' : 'q' | ||
sock.write("mg #{key} v f k #{tail}\r\n") | ||
end | ||
sock.flush | ||
# read all the memcached responses back and build a hash of key value pairs | ||
results = {} | ||
last_result = false | ||
while (line = sock.readline.chomp!(TERMINATOR)) != '' | ||
last_result = true if line.start_with?('EN ') | ||
next unless line.start_with?('VA ') || last_result | ||
|
||
_, value_length, _flags, key = line.split | ||
results[key[1..]] = sock.read(value_length.to_i) | ||
sock.read(TERMINATOR.length) | ||
break if results.size == pairs.size | ||
break if last_result | ||
end | ||
results | ||
end | ||
# rubocop:enable Metrics/MethodLength | ||
# rubocop:enable Metrics/PerceivedComplexity | ||
# rubocop:enable Metrics/AbcSize | ||
# rubocop:enable Metrics/CyclomaticComplexity | ||
|
||
if %w[all set].include?(bench_target) | ||
Benchmark.ips do |x| | ||
x.config(warmup: bench_warmup, time: bench_time, suite: suite) | ||
x.report('client set') { client.set('key', payload) } | ||
# x.report('multi client set') { multi_client.set('string_key', payload) } | ||
x.report('raw sock set') do | ||
sock.write("ms sock_key #{payload.bytesize} T3600 MS\r\n") | ||
sock.write(payload) | ||
sock.write("\r\n") | ||
sock.flush | ||
sock.readline # clear the buffer | ||
end | ||
x.compare! | ||
end | ||
end | ||
|
||
@lock = Monitor.new | ||
if %w[all get].include?(bench_target) | ||
Benchmark.ips do |x| | ||
x.config(warmup: bench_warmup, time: bench_time, suite: suite) | ||
x.report('get dalli') do | ||
result = client.get('key') | ||
raise 'mismatch' unless result == payload | ||
end | ||
# NOTE: while this is the fastest it is not thread safe and is blocking vs IO sharing friendly | ||
x.report('get sock') do | ||
sock.write("mg sock_key v\r\n") | ||
sock.readline | ||
result = sock.read(payload.bytesize) | ||
sock.read(TERMINATOR.bytesize) | ||
raise 'mismatch' unless result == payload | ||
end | ||
# NOTE: This shows that when adding thread safety & non-blocking IO we are slower for single process/thread use case | ||
x.report('get sock non-blocking') do | ||
@lock.synchronize do | ||
sock.write("mg sock_key v\r\n") | ||
sock.readline | ||
count = payload.bytesize | ||
value = String.new(capacity: count + 1) | ||
loop do | ||
begin | ||
value << sock.read_nonblock(count - value.bytesize) | ||
rescue Errno::EAGAIN | ||
sock.wait_readable | ||
retry | ||
rescue EOFError | ||
puts 'EOFError' | ||
break | ||
end | ||
break if value.bytesize == count | ||
end | ||
sock.read(TERMINATOR.bytesize) | ||
raise 'mismatch' unless value == payload | ||
end | ||
end | ||
x.compare! | ||
end | ||
end | ||
|
||
if %w[all get_multi].include?(bench_target) | ||
Benchmark.ips do |x| | ||
x.config(warmup: bench_warmup, time: bench_time, suite: suite) | ||
x.report('get 100 keys') do | ||
result = client.get_multi(pairs.keys) | ||
raise 'mismatch' unless result == pairs | ||
end | ||
x.report('get 100 keys raw sock') do | ||
result = sock_get_multi(sock, pairs) | ||
raise 'mismatch' unless result == pairs | ||
end | ||
x.compare! | ||
end | ||
end | ||
|
||
if %w[all set_multi].include?(bench_target) | ||
Benchmark.ips do |x| | ||
x.config(warmup: bench_warmup, time: bench_time, suite: suite) | ||
x.report('write 100 keys simple') do | ||
client.quiet do | ||
pairs.each do |key, value| | ||
client.set(key, value, 3600, raw: true) | ||
end | ||
end | ||
end | ||
# TODO: uncomment this once we add PR adding set_multi | ||
# x.report('multi client set_multi 100') do | ||
# multi_client.set_multi(pairs, 3600, raw: true) | ||
# end | ||
x.report('write 100 keys rawsock') do | ||
count = pairs.length | ||
tail = '' | ||
value_bytesize = payload_smaller.bytesize | ||
ttl = 3600 | ||
|
||
pairs.each do |key, value| | ||
count -= 1 | ||
tail = count.zero? ? '' : 'q' | ||
sock.write(String.new("ms #{key} #{value_bytesize} c F0 T#{ttl} MS #{tail}\r\n", | ||
capacity: key.size + value_bytesize + 40) << value << TERMINATOR) | ||
end | ||
sock.flush | ||
sock.gets(TERMINATOR) # clear the buffer | ||
end | ||
# x.report('write_mutli 100 keys') { client.set_multi(pairs, 3600, raw: true) } | ||
x.compare! | ||
end | ||
end |
Oops, something went wrong.