Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not mark switchover complete if subscription drop fails #188

Merged
merged 1 commit into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions lib/pg_easy_replicate/orchestrate.rb
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,11 @@ def switchover(
revoke_connections_on_source_db(group_name)
wait_for_remaining_catchup(group_name)
refresh_sequences(conn_string: target_conn, schema: group[:schema_name])

drop_subscription(
group_name: group_name,
target_conn_string: target_conn,
)
mark_switchover_complete(group_name)

unless skip_vacuum_analyze
Expand All @@ -248,11 +253,6 @@ def switchover(
schema: group[:schema_name],
)
end

drop_subscription(
group_name: group_name,
target_conn_string: target_conn,
)
rescue => e
restore_connections_on_source_db(group_name)
abort_with("Switchover failed: #{e.message}")
Expand Down
67 changes: 62 additions & 5 deletions spec/pg_easy_replicate/orchestrate_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,7 @@
end

describe ".drop_subscription" do
before do
PgEasyReplicate.bootstrap({ group_name: "cluster1" })
end
before { PgEasyReplicate.bootstrap({ group_name: "cluster1" }) }

after do
PgEasyReplicate.cleanup({ everything: true, group_name: "cluster1" })
Expand All @@ -234,7 +232,8 @@
it "raises an error when user does not have sufficient privileges" do
# Use a limited user for this test case
restricted_user = "no_sup"
restricted_user_target_connection_url = target_connection_url(restricted_user)
restricted_user_target_connection_url =
target_connection_url(restricted_user)

described_class.create_subscription(
group_name: "cluster1",
Expand All @@ -250,7 +249,9 @@
end.to raise_error(RuntimeError) { |e|
expect(e.message).to include("Unable to drop subscription")
}
expect(pg_subscriptions(connection_url: target_connection_url)).not_to eq([])
expect(pg_subscriptions(connection_url: target_connection_url)).not_to eq(
[],
)
end
end

Expand Down Expand Up @@ -704,6 +705,62 @@
{ last_analyze: nil, last_vacuum: nil, relname: "spatial_ref_sys" },
)
end

it "does not mark switchover complete if subscription drop fails" do
conn1 =
PgEasyReplicate::Query.connect(
connection_url: connection_url,
schema: test_schema,
)
conn1[:items].insert(name: "Foo1")
expect(conn1[:items].first[:name]).to eq("Foo1")

conn2 =
PgEasyReplicate::Query.connect(
connection_url: target_connection_url,
schema: test_schema,
)
expect(conn2[:items].first).to be_nil

ENV["SECONDARY_SOURCE_DB_URL"] = docker_compose_source_connection_url
described_class.start_sync(
group_name: "cluster1",
schema_name: test_schema,
recreate_indices_post_copy: true,
)

allow(described_class).to receive(:drop_subscription).and_raise(
"Subscription drop failed",
)

expect do
described_class.switchover(
group_name: "cluster1",
source_conn_string: connection_url,
target_conn_string: target_connection_url,
skip_vacuum_analyze: true,
)
end.to raise_error("Switchover failed: Subscription drop failed")

described_class.restore_connections_on_source_db("cluster1")

expect(PgEasyReplicate::Group.find("cluster1")).to include(
switchover_completed_at: nil,
)

RSpec::Mocks.space.proxy_for(described_class).reset

described_class.switchover(
group_name: "cluster1",
source_conn_string: connection_url,
target_conn_string: target_connection_url,
skip_vacuum_analyze: true,
)
described_class.restore_connections_on_source_db("cluster1")
expect(PgEasyReplicate::Group.find("cluster1")).to include(
switchover_completed_at: kind_of(Time),
)
end
end

# Note: Hard to test for special roles that act as superuser which aren't superuser, like rds_superuser
Expand Down