Skip to content

Commit

Permalink
FIX: Falling back to primary PG server not reliable on Rails 7.1 (#71)
Browse files Browse the repository at this point in the history
This commit adds the use of [ActiveRecord::ConnectionAdatpers::PostgreSQLAdatper#verify!](https://api.rubyonrails.org/classes/ActiveRecord/ConnectionAdapters/AbstractAdapter.html#method-i-verify-21)
when a new connection is retrieved from the connection pool. This is because `ActiveRecord::ConnectionAdapters::PostgreSQLAdapter#active?`
was changed in Rails 7.1 to return `false` if the connection has not be used to do
something meaningful. Ref:
rails/rails@8551e64.
Due to this change, our fallback to primary checker will keep thinking
that the primary PG server is down. Note that before Rails 7.1, `ActiveRecord::ConnectionAdapters::PostgreSQLAdapter#active?`
will always execute a fake query to check if the query can be executed. By calling `ActiveRecord::ConnectionAdatpers::PostgreSQLAdatper#verify!`, before `ActiveRecord::ConnectionAdapters::PostgreSQLAdapter#active?`, we ensure that the connection to the Postgres 
server has been made first.

This commit also updates the ActiveRecord tests to be more reliable by
reducing the Unicorn worker processes to 1 so that we don't have to rely
on flooding the app with requests to get all the Unicorn processes to
failover/fallback.

Co-authored-by: Loïc Guitaut <loic@discourse.org>
tgxworld and Flink authored Jul 3, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent b0dd7e7 commit 92c1f7e
Showing 17 changed files with 282 additions and 81 deletions.
1 change: 0 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -84,7 +84,6 @@ jobs:
- name: Setup postgres
run: |
make setup_pg
make start_pg
- name: ActiveRecord specs
env:
1 change: 1 addition & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
--color
--require spec_helper
--format documentation
--order random
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -5,7 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [2.1.1] - 2024-07-02

- FIX: Falling back to primary PG server not reliable on Rails 7.1

## [2.1.0] - 2024-05-29

- DEV: Update dependencies to officially support Rails 7.1

## [2.0.1] - 2023-05-30
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -107,7 +107,6 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
The ActiveRecord failover tests are run against a dummy Rails server. Run the following commands to run the test:

1. `make setup_pg`
2. `make start_pg`
3. `bin/rspec active_record`. You may also run the tests with more unicorn workers by adding the `UNICORN_WORKERS` env variable.

#### Redis
9 changes: 5 additions & 4 deletions lib/rails_failover/active_record/handler.rb
Original file line number Diff line number Diff line change
@@ -60,10 +60,11 @@ def initiate_fallback_to_primary

begin
connection =
::ActiveRecord::Base.connection_handler.retrieve_connection(
spec_name,
role: handler_key,
)
::ActiveRecord::Base
.connection_handler
.retrieve_connection(spec_name, role: handler_key)
.tap(&:verify!)

connection_active = connection.active?
rescue => e
logger.debug "#{Process.pid} Connection to server for '#{handler_key} #{spec_name}' failed with '#{e.message}'"
2 changes: 1 addition & 1 deletion lib/rails_failover/active_record/railtie.rb
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@ class Railtie < ::Rails::Railtie
app.config.active_record_rails_failover = true
::ActiveSupport.on_load(:active_record) do
begin
::ActiveRecord::Base.connection
::ActiveRecord::Base.connection.verify!
rescue ::ActiveRecord::NoDatabaseError
# Do nothing since database hasn't been created
rescue ::PG::Error, ::ActiveRecord::ConnectionNotEstablished
2 changes: 1 addition & 1 deletion lib/rails_failover/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module RailsFailover
VERSION = "2.1.0"
VERSION = "2.1.1"
end
15 changes: 2 additions & 13 deletions makefile
Original file line number Diff line number Diff line change
@@ -3,19 +3,8 @@ include redis.mk

all: redis

active_record: teardown_dummy_rails_server setup_dummy_rails_server test_active_record
active_record: test_active_record

test_active_record:
@BUNDLE_GEMFILE=./spec/support/dummy_app/Gemfile bundle install --quiet
@ACTIVE_RECORD=1 bundle exec rspec --tag type:active_record ${RSPEC_PATH}

setup_dummy_rails_server:
@cd spec/support/dummy_app && BUNDLE_GEMFILE=Gemfile bundle install --quiet && BUNDLE_GEMFILE=Gemfile RAILS_ENV=production $(BUNDLER_BIN) exec rails db:create db:migrate db:seed

start_dummy_rails_server:
@cd spec/support/dummy_app && BUNDLE_GEMFILE=Gemfile SECRET_KEY_BASE=somekey bundle exec unicorn -c config/unicorn.conf.rb -D -E production

stop_dummy_rails_server:
@kill -TERM $(shell cat spec/support/dummy_app/tmp/pids/unicorn.pid)

teardown_dummy_rails_server:
@cd spec/support/dummy_app && (! (bundle check > /dev/null 2>&1) || BUNDLE_GEMFILE=Gemfile DISABLE_DATABASE_ENVIRONMENT_CHECK=1 RAILS_ENV=production $(BUNDLER_BIN) exec rails db:drop)
3 changes: 0 additions & 3 deletions postgresql.mk
Original file line number Diff line number Diff line change
@@ -41,9 +41,6 @@ start_pg_primary:
start_pg_replica:
@$(PG_BIN_DIR)/pg_ctl --silent --log /dev/null -w -D $(PG_REPLICA_DATA_DIR) -o "-p $(PG_REPLICA_PORT)" -o "-k $(PG_REPLICA_RUN_DIR)" start

restart_pg_primary:
@$(PG_BIN_DIR)/pg_ctl --silent --log /dev/null -w -D $(PG_PRIMARY_DATA_DIR) -o "-p $(PG_PRIMARY_PORT)" -o "-k $(PG_PRIMARY_RUN_DIR)" restart

stop_pg_primary:
@$(PG_BIN_DIR)/pg_ctl --silent --log /dev/null -w -D $(PG_PRIMARY_DATA_DIR) -o "-p $(PG_PRIMARY_PORT)" -o "-k $(PG_PRIMARY_RUN_DIR)" stop

8 changes: 8 additions & 0 deletions spec/helpers/generic_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

module GenericHelper
def wait_for(timeout:, &blk)
till = Time.now + (timeout.to_f / 1000)
sleep 0.001 while Time.now < till && !blk.call
end
end
71 changes: 71 additions & 0 deletions spec/helpers/postgres_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# frozen_string_literal: true

module PostgresHelper
def start_pg_primary
return if pg_primary_is_up?
system("make start_pg_primary")
wait_for_pg_primary_to_be_up
end

def stop_pg_primary
return if pg_primary_is_down?
system("make stop_pg_primary")
wait_for_pg_primary_to_be_down
end

def start_pg_replica
system("make start_pg_replica")
wait_for_pg_replica_to_be_up
end

def stop_pg_replica
system("make stop_pg_replica")
wait_for_pg_replica_to_be_down
end

private

def pg_primary_is_up?
File.exist?(pg_primary_pid_path)
end

def pg_primary_is_down?
!File.exist?(pg_primary_pid_path)
end

def wait_for_pg_primary_to_be_up
wait_for_pg_to_be_up(role: :primary)
end

def wait_for_pg_primary_to_be_down
wait_for_pg_to_be_down(role: :primary)
end

def wait_for_pg_replica_to_be_up
wait_for_pg_to_be_up(role: :replica)
end

def wait_for_pg_replica_to_be_down
wait_for_pg_to_be_down(role: :replica)
end

def wait_for_pg_to_be_up(role:)
wait_for(timeout: 5) { File.exist?(self.send("pg_#{role}_pid_path")) }
end

def wait_for_pg_to_be_down(role:)
wait_for(timeout: 5) { !File.exist?(self.send("pg_#{role}_pid_path")) }
end

def pg_replica_pid_path
"#{gem_root}/tmp/replica/data/postmaster.pid"
end

def pg_primary_pid_path
"#{gem_root}/tmp/primary/data/postmaster.pid"
end

def gem_root
File.expand_path("../..", __dir__)
end
end
84 changes: 84 additions & 0 deletions spec/helpers/rails_server_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# frozen_string_literal: true

module RailsServerHelper
def setup_rails_server
execute_command(
"cd spec/support/dummy_app && BUNDLE_GEMFILE=Gemfile RAILS_ENV=production bin/bundle exec rails db:create db:migrate db:seed",
)
end

def start_rails_server
if (
(unicorn_master_pid = get_unicorn_master_pid) != 0 &&
(get_unicorn_worker_pids(unicorn_master_pid).size == 1.to_i)
)
return
end

system(
"cd spec/support/dummy_app && BUNDLE_GEMFILE=Gemfile SECRET_KEY_BASE=somekey bin/bundle exec unicorn -c config/unicorn.conf.rb -D -E production",
)

count = 0
timeout = 10

while (unicorn_master_pid = get_unicorn_master_pid) == 0
raise "Timeout while waiting for unicorn master to be up" if count == timeout
count += 1
sleep 1
end

count = 0
timeout = 10

while get_unicorn_worker_pids(unicorn_master_pid).size != 1.to_i
raise "Timeout while waiting for unicorn worker to be up" if count == timeout
count += 1
sleep 1
end

true
end

def stop_rails_server
system("kill -15 #{get_unicorn_master_pid}")

count = 0
timeout = 10

while get_unicorn_master_pid != 0
raise "Timeout while waiting for unicorn master to be down" if count == timeout
count += 1
sleep 1
end

true
end

def teardown_rails_server
execute_command(
"cd spec/support/dummy_app && BUNDLE_GEMFILE=Gemfile DISABLE_DATABASE_ENVIRONMENT_CHECK=1 RAILS_ENV=production bin/bundle exec rails db:drop",
)
end

private

def execute_command(command)
output = `#{command}`
raise "Command failed: #{command}\nOutput: #{output}" unless $?.success?

puts output if ENV["VERBOSE"]

output
end

def get_unicorn_master_pid
execute_command(
"ps aux | grep \"unicorn master\" | grep -v \"grep\" | awk '{print $2}'",
).strip.to_i
end

def get_unicorn_worker_pids(unicorn_master_pid)
execute_command("pgrep -P #{unicorn_master_pid}").split("\n").map(&:to_i)
end
end
149 changes: 95 additions & 54 deletions spec/integration/active_record_spec.rb
Original file line number Diff line number Diff line change
@@ -1,88 +1,120 @@
# frozen_string_literal: true

require "spec_helper"
require "fileutils"

RSpec.describe "ActiveRecord failover", type: :active_record do
EXPECTED_POSTS_COUNT = "100"

def start_dummy_rails_server
raise "Could not start dummy server" if !system("make start_dummy_rails_server")
def restart_dummy_rails_server
stop_rails_server
start_rails_server
end

def stop_dummy_rails_server
system("make stop_dummy_rails_server")
# rubocop:disable RSpec/BeforeAfterAll
before(:all) do
start_pg_primary
start_pg_replica
setup_rails_server
start_rails_server
end

def restart_dummy_rails_server
stop_dummy_rails_server
start_dummy_rails_server
after do
start_pg_primary
start_rails_server
end

# rubocop:disable RSpec/BeforeAfterAll
before(:all) { start_dummy_rails_server }
after(:all) do
stop_rails_server
teardown_rails_server
stop_pg_replica
stop_pg_primary
end

it "should failover to reading connection handler when PG primary is down and fallback to writing connection handler when PG primary is back up" do
response = get("/posts")

expect(response.code.to_i).to eq(200)

expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: writing
BODY

after(:all) { stop_dummy_rails_server }
stop_pg_primary

get("/posts") # Trigger process to failover

it "should failover to reading connection handler when PG primary " \
"is down and fallback to writing connection handler when PG primary is back up" do
response = get("/posts")

expect(response.code.to_i).to eq(200)
expect(response.body).to include("writing")
expect(response.body).to include(EXPECTED_POSTS_COUNT)

system("make stop_pg_primary")
expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: reading
BODY

flood_get("/posts", times: 10) # Trigger all processes to failover
start_pg_primary # Start the fallback process

flood_get("/posts", times: 100) do |res|
expect(res.code.to_i).to eq(200)
expect(res.body).to include("reading")
expect(res.body).to include(EXPECTED_POSTS_COUNT)
end
ensure
system("make restart_pg_primary")
sleep 0.05 # Wait for fallback to complete

response = get("/posts")

expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: writing
BODY
end

it "should be able to start with the PG primary being down" do
stop_dummy_rails_server
system("make stop_pg_primary")
start_dummy_rails_server
stop_rails_server
stop_pg_primary
start_rails_server

flood_get("/posts", times: 100) do |response|
expect(response.code.to_i).to eq(200)
expect(response.body).to include("reading")
end
response = get("/posts")

expect(response.code.to_i).to eq(200)

system("make start_pg_primary")
expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: reading
BODY

sleep 0.05
start_pg_primary

flood_get("/posts", times: 100) do |response|
expect(response.code.to_i).to eq(200)
expect(response.body).to include("writing")
end
ensure
system("make restart_pg_primary")
sleep 0.05 # Wait for fallback to complete

response = get("/posts")

expect(response.code.to_i).to eq(200)

expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: writing
BODY
end

it "supports multiple databases automatically" do
response = get("/posts?role=two_writing")

expect(response.code.to_i).to eq(200)
expect(response.body).to include("two_writing")

system("make stop_pg_primary")
expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: two_writing
BODY

flood_get("/posts?role=two_writing", times: 10) # Trigger all processes to failover
stop_pg_primary

flood_get("/posts?role=two_writing", times: 100) do |resp|
expect(resp.code.to_i).to eq(200)
expect(resp.body).to include("two_reading")
end
ensure
system("make start_pg_primary")
get("/posts?role=two_writing") # Trigger process to failover

response = get("/posts?role=two_writing")

expect(response.code.to_i).to eq(200)

expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: two_reading
BODY
end

it "should not failover on PG server errors" do
@@ -93,7 +125,11 @@ def restart_dummy_rails_server
response = get("/posts")

expect(response.code.to_i).to eq(200)
expect(response.body).to include("writing")

expect(response.body).to eq(<<~BODY.chomp)
Posts count: #{EXPECTED_POSTS_COUNT}
role: writing
BODY
end

context "when PG exception is raised before ActionDispatch::DebugExceptions" do
@@ -104,12 +140,14 @@ def restart_dummy_rails_server
after { FileUtils.rm_f(path) }

it "fails over" do
flood_get("/trigger-middleware-pg-exception", times: 10) do |response|
expect(response.code.to_i).to eq(500)
end
response = get("/trigger-middleware-pg-exception")

expect(response.code.to_i).to eq(500)

sleep 0.05

sleep 0.5
response = get("/posts")

expect(response.code.to_i).to eq(200)
expect(path.exist?).to be true
end
@@ -123,7 +161,10 @@ def restart_dummy_rails_server

before { FileUtils.cp(no_replicas_config, db_config) }

after { FileUtils.cp(replicas_config, db_config) }
after do
FileUtils.cp(replicas_config, db_config)
restart_dummy_rails_server
end

it "does not prevent Rails from loading" do
expect { restart_dummy_rails_server }.not_to raise_error
7 changes: 7 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

require "bundler/setup"
require "byebug"
require "helpers/generic_helper"

RSpec.configure do |config|
# Enable flags like --only-failures and --next-failure
@@ -14,6 +15,8 @@
c.syntax = :expect
end

config.include GenericHelper

if ENV["REDIS"]
require "rails_failover/redis"
require "helpers/redis_helper"
@@ -22,6 +25,10 @@

if ENV["ACTIVE_RECORD"]
require "helpers/url_helper"
require "helpers/postgres_helper"
require "helpers/rails_server_helper"
config.include UrlHelper
config.include PostgresHelper
config.include RailsServerHelper
end
end
1 change: 1 addition & 0 deletions spec/support/dummy_app/app/controllers/posts_controller.rb
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ class PostsController < ApplicationController
def index
@posts_count = Post.count
@role = request.env["rails_failover.role"]
render plain: "Posts count: #{@posts_count}\nrole: #{@role}"
end

def trigger_pg_server_error
2 changes: 0 additions & 2 deletions spec/support/dummy_app/app/views/posts/index.html.erb

This file was deleted.

2 changes: 1 addition & 1 deletion spec/support/dummy_app/config/unicorn.conf.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
worker_processes (ENV["UNICORN_WORKERS"] || 5).to_i
worker_processes (ENV["UNICORN_WORKERS"] || 1).to_i

path = File.expand_path(File.expand_path(File.dirname(__FILE__)) + "/../")

0 comments on commit 92c1f7e

Please sign in to comment.