diff --git a/Dockerfile.airflow b/Dockerfile.airflow index d877bf73..7d6fb622 100644 --- a/Dockerfile.airflow +++ b/Dockerfile.airflow @@ -51,9 +51,6 @@ COPY --chown=airflow:airflow run_test.sh ./ RUN if [ "${install_dev}" = "y" ]; then chmod +x run_test.sh; fi -COPY --chown=airflow:airflow worker.sh ./ -RUN chmod +x worker.sh - COPY notebooks/example.ipynb ./applications_file_directory/notebooks/example.ipynb RUN mkdir -p $AIRFLOW_HOME/serve diff --git a/Makefile b/Makefile index 578e3001..5f27079c 100644 --- a/Makefile +++ b/Makefile @@ -245,20 +245,12 @@ airflow-scheduler-exec: $(AIRFLOW_DOCKER_COMPOSE) exec scheduler bash -airflow-dask-worker-shell: - $(AIRFLOW_DOCKER_COMPOSE) run --rm dask-worker bash - - -airflow-dask-worker-exec: - $(AIRFLOW_DOCKER_COMPOSE) exec dask-worker bash - - airflow-logs: - $(AIRFLOW_DOCKER_COMPOSE) logs -f scheduler webserver dask-worker + $(AIRFLOW_DOCKER_COMPOSE) logs -f scheduler webserver worker airflow-start: - $(AIRFLOW_DOCKER_COMPOSE) up -d --scale dask-worker=1 scheduler + $(AIRFLOW_DOCKER_COMPOSE) up worker webserver flower $(MAKE) airflow-print-url diff --git a/docker-compose.override.yml b/docker-compose.override.yml index 101b58e5..2380094c 100644 --- a/docker-compose.override.yml +++ b/docker-compose.override.yml @@ -19,7 +19,7 @@ services: scheduler: volumes: *airflow-volumes - dask-worker: + worker: environment: - DEPLOYMENT_ENV=dev volumes: *airflow-volumes diff --git a/docker-compose.yml b/docker-compose.yml index a8ea95dd..2d36e04b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,10 +6,11 @@ x-airflow-env: - LOAD_EX=n - AIRFLOW_HOST=webserver - AIRFLOW_PORT=8080 - - AIRFLOW__CORE__EXECUTOR=DaskExecutor + - AIRFLOW__CORE__EXECUTOR=CeleryExecutor + - AIRFLOW__CELERY__BROKER_URL=redis://redis:6379/1 + - AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://airflow:airflow@postgres:5432/airflow - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow - AIRFLOW__CORE__FERNET_KEY='81HqDtbqAywKSOumSha3BhWNOdQ26slT6K0YaZeZyPs=' - - AIRFLOW__DASK__CLUSTER_ADDRESS=dask-scheduler:8786 - AIRFLOW__API__AUTH_BACKEND=airflow.api.auth.backend.default - DEPLOYMENT_ENV=ci - GOOGLE_APPLICATION_CREDENTIALS=/home/airflow/.config/gcloud/credentials.json @@ -54,9 +55,7 @@ services: webserver: depends_on: - - postgres - - dask-worker - - dask-scheduler + - worker environment: *airflow-env image: elifesciences/data-science-airflow-dag:${IMAGE_TAG} entrypoint: /entrypoint @@ -65,7 +64,7 @@ services: scheduler: image: elifesciences/data-science-airflow-dag:${IMAGE_TAG} depends_on: - - webserver + - postgres environment: *airflow-env entrypoint: /entrypoint command: scheduler @@ -74,10 +73,24 @@ services: image: elifesciences/data-science-airflow-dag:${IMAGE_TAG} depends_on: - scheduler + - webserver environment: *airflow-env command: > bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud && ./run_test.sh with-end-to-end" + + worker: + environment: *airflow-env + depends_on: + - redis + - scheduler + image: elifesciences/data-science-airflow-dag:${IMAGE_TAG} + entrypoint: /entrypoint + hostname: worker + command: > + bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud + && sudo install -D /tmp/.aws-credentials -m 644 --no-target-directory /home/airflow/.aws/credentials + && airflow worker" postgres: image: postgres:9.6 @@ -86,29 +99,23 @@ services: - POSTGRES_PASSWORD=airflow - POSTGRES_DB=airflow healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] + test: ["CMD-SHELL", "pg_isready -U airflow"] interval: 5s timeout: 5s retries: 5 + redis: + image: redis:5.0.5 + environment: + - ALLOW_EMPTY_PASSWORD=yes - dask-scheduler: - environment: *airflow-env - image: elifesciences/data-science-airflow-dag:${IMAGE_TAG} - hostname: dask-scheduler - entrypoint: [ ] - command: ["dask-scheduler"] - - dask-worker: - environment: *airflow-env + flower: + image: elifesciences/data-hub-ejp-xml-pipeline-dev depends_on: - - dask-scheduler - image: elifesciences/data-science-airflow-dag:${IMAGE_TAG} - hostname: dask-worker - entrypoint: [] - command: > - bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud - && sudo install -D /tmp/.aws-credentials -m 644 --no-target-directory /home/airflow/.aws/credentials - && ./worker.sh tcp://dask-scheduler:8786" + - redis + environment: *airflow-env + ports: + - "5555:5555" + command: celery flower peerscout-api: build: diff --git a/requirements.dag.txt b/requirements.dag.txt index 5a79c7c8..1f25eadc 100644 --- a/requirements.dag.txt +++ b/requirements.dag.txt @@ -1,6 +1,4 @@ apache-airflow[crypto,celery,postgres,jdbc,ssh]==1.10.15 -dask[complete]<=2021.2.0, >=2.17.0 -distributed<=2021.2.0, >=2.17.0 papermill==2.3.3 click==7.1.2 ansiwrap==0.8.4 diff --git a/worker.sh b/worker.sh deleted file mode 100644 index 383b53fc..00000000 --- a/worker.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -set -e - -cd $AIRFLOW_HOME/serve -python3 -m http.server 8793 & - -cd - -dask-worker $@