Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cdc): Prepare the self hosted environment for the Change Data Capture pipeline #938

Merged
merged 16 commits into from
May 25, 2021
1 change: 1 addition & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ SENTRY_IMAGE=getsentry/sentry:nightly
SNUBA_IMAGE=getsentry/snuba:nightly
RELAY_IMAGE=getsentry/relay:nightly
SYMBOLICATOR_IMAGE=getsentry/symbolicator:nightly
WAL2JSON_VERSION=latest
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,6 @@ symbolicator/config.yml
geoip/GeoIP.conf
geoip/*.mmdb
geoip/.geoipupdate.lock

# wal2json download
postgres/wal2json
6 changes: 6 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,16 @@ services:
postgres:
<<: *restart_policy
image: "postgres:9.6"
command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=1", "-c", "max_wal_senders=1"]
environment:
POSTGRES_HOST_AUTH_METHOD: "trust"
entrypoint: /opt/sentry/postgres-entrypoint.sh
volumes:
- "sentry-postgres:/var/lib/postgresql/data"
- type: bind
read_only: true
source: ./postgres/
target: /opt/sentry/
zookeeper:
<<: *restart_policy
image: "confluentinc/cp-zookeeper:5.5.0"
Expand Down
1 change: 1 addition & 0 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ source update-docker-images.sh
source build-docker-images.sh
source turn-things-off.sh
source set-up-zookeeper.sh
source install-wal2json.sh
source bootstrap-snuba.sh
source create-kafka-topics.sh
source upgrade-postgres.sh
Expand Down
34 changes: 34 additions & 0 deletions install/install-wal2json.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
echo "${_group}Downloading and installing wal2json ..."

FILE_TO_USE="../postgres/wal2json/wal2json.so"
ARCH=$(uname -m)
FILE_NAME="wal2json-Linux-$ARCH.so"

DOCKER_CURL="docker run --rm curlimages/curl"

if [[ $WAL2JSON_VERSION == "latest" ]]; then
VERSION=$(
$DOCKER_CURL https://api.github.com/repos/getsentry/wal2json/releases/latest |
grep '"tag_name":' |
sed -E 's/.*"([^"]+)".*/\1/'
)

if [[ ! $VERSION ]]; then
echo "Cannot find wal2json latest version"
exit 1
fi
else
VERSION=$WAL2JSON_VERSION
fi

mkdir -p ../postgres/wal2json
if [ ! -f "../postgres/wal2json/$VERSION/$FILE_NAME" ]; then
mkdir -p "../postgres/wal2json/$VERSION"
$DOCKER_CURL -L \
"https://github.com/getsentry/wal2json/releases/download/$VERSION/$FILE_NAME" \
> "../postgres/wal2json/$VERSION/$FILE_NAME"

cp "`pwd`/../postgres/wal2json/$VERSION/$FILE_NAME" "$FILE_TO_USE"
fi

echo "${_endgroup}"
7 changes: 7 additions & 0 deletions postgres/init_hba.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
# Initializes the pg_hba file with access permissions to the replication
# slots.

set -e

{ echo "host replication all all trust"; } >> "$PGDATA/pg_hba.conf"
46 changes: 46 additions & 0 deletions postgres/postgres-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
# This script replaces the default docker entrypoint for postgres in the
# development environment.
# Its job is to ensure postgres is properly configured to support the
# Change Data Capture pipeline (by setting access permissions and installing
# the replication plugin we use for CDC). Unfortunately the default
# Postgres image does not allow this level of configurability so we need
# to do it this way in order not to have to publish and maintain our own
# Postgres image.
#
# This then, at the end, transfers control to the default entrypoint.

set -e

prep_init_db() {
cp /opt/sentry/init_hba.sh /docker-entrypoint-initdb.d/init_hba.sh
}

cdc_setup_hba_conf() {
# Ensure pg-hba is properly configured to allow connections
# to the replication slots.

PG_HBA="$PGDATA/pg_hba.conf"
if [ ! -f "$PG_HBA" ]; then
echo "DB not initialized. Postgres will take care of pg_hba"
elif [ "$(grep -c -E "^host\s+replication" "$PGDATA"/pg_hba.conf)" != 0 ]; then
echo "Replication config already present in pg_hba. Not changing anything."
else
# Execute the same script we run on DB initialization
/opt/sentry/init_hba.sh
fi
}

bind_wal2json() {
# Copy the file in the right place
cp /opt/sentry/wal2json/wal2json.so `pg_config --pkglibdir`/wal2json.so
}

echo "Setting up Change Data Capture"

prep_init_db
if [ "$1" = 'postgres' ]; then
cdc_setup_hba_conf
bind_wal2json
fi
exec /docker-entrypoint.sh "$@"
3 changes: 3 additions & 0 deletions scripts/bump-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ OLD_VERSION="$1"
NEW_VERSION="$2"

SYMBOLICATOR_VERSION=${SYMBOLICATOR_VERSION:-$(curl -s "https://api.github.com/repos/getsentry/symbolicator/releases/latest" | grep -Po '"tag_name": "\K.*?(?=")')}
WAL2JSON_VERSION=${WAL2JSON_VERSION:-$(curl -s "https://api.github.com/repos/getsentry/wal2json/releases/latest" | grep -Po '"tag_name": "\K.*?(?=")')}

sed -i -e "s/^SYMBOLICATOR_IMAGE=\([^:]\+\):.\+\$/SYMBOLICATOR_IMAGE=\1:$SYMBOLICATOR_VERSION/" .env
sed -i -e "s/^WAL2JSON_VERSION=\([^:]\+\):.\+\$/WAL2JSON_VERSION=\1:$WAL2JSON_VERSION/" .env
sed -i -e "s/^\(SENTRY\|SNUBA\|RELAY\)_IMAGE=\([^:]\+\):.\+\$/\1_IMAGE=\2:$NEW_VERSION/" .env
sed -i -e "s/^\# Self-Hosted Sentry .*/# Self-Hosted Sentry $NEW_VERSION/" README.md
sed -i -e "s/\(Change Date:\s*\)[-0-9]\+\$/\\1$(date +'%Y-%m-%d' -d '3 years')/" LICENSE

echo "New version: $NEW_VERSION"
echo "New Symbolicator version: $SYMBOLICATOR_VERSION"
echo "New wal2json version: $WAL2JSON_VERSION"