Skip to content

Commit

Permalink
Remove dask dependency (#570)
Browse files Browse the repository at this point in the history
* removed dask from requirements

* updated postgres healtcheck user to airflow

* added celerexecutor removed dask executor

* removed dask services from docker-compose

* removed worker.sh

* removed dask env vars

* update makefile
  • Loading branch information
HazalCiplak authored May 12, 2022
1 parent f45c4e1 commit def3d7a
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 49 deletions.
3 changes: 0 additions & 3 deletions Dockerfile.airflow
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@ COPY --chown=airflow:airflow run_test.sh ./

RUN if [ "${install_dev}" = "y" ]; then chmod +x run_test.sh; fi

COPY --chown=airflow:airflow worker.sh ./
RUN chmod +x worker.sh

COPY notebooks/example.ipynb ./applications_file_directory/notebooks/example.ipynb

RUN mkdir -p $AIRFLOW_HOME/serve
Expand Down
12 changes: 2 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -245,20 +245,12 @@ airflow-scheduler-exec:
$(AIRFLOW_DOCKER_COMPOSE) exec scheduler bash


airflow-dask-worker-shell:
$(AIRFLOW_DOCKER_COMPOSE) run --rm dask-worker bash


airflow-dask-worker-exec:
$(AIRFLOW_DOCKER_COMPOSE) exec dask-worker bash


airflow-logs:
$(AIRFLOW_DOCKER_COMPOSE) logs -f scheduler webserver dask-worker
$(AIRFLOW_DOCKER_COMPOSE) logs -f scheduler webserver worker


airflow-start:
$(AIRFLOW_DOCKER_COMPOSE) up -d --scale dask-worker=1 scheduler
$(AIRFLOW_DOCKER_COMPOSE) up worker webserver flower
$(MAKE) airflow-print-url


Expand Down
2 changes: 1 addition & 1 deletion docker-compose.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ services:
scheduler:
volumes: *airflow-volumes

dask-worker:
worker:
environment:
- DEPLOYMENT_ENV=dev
volumes: *airflow-volumes
Expand Down
55 changes: 31 additions & 24 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ x-airflow-env:
- LOAD_EX=n
- AIRFLOW_HOST=webserver
- AIRFLOW_PORT=8080
- AIRFLOW__CORE__EXECUTOR=DaskExecutor
- AIRFLOW__CORE__EXECUTOR=CeleryExecutor
- AIRFLOW__CELERY__BROKER_URL=redis://redis:6379/1
- AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://airflow:airflow@postgres:5432/airflow
- AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
- AIRFLOW__CORE__FERNET_KEY='81HqDtbqAywKSOumSha3BhWNOdQ26slT6K0YaZeZyPs='
- AIRFLOW__DASK__CLUSTER_ADDRESS=dask-scheduler:8786
- AIRFLOW__API__AUTH_BACKEND=airflow.api.auth.backend.default
- DEPLOYMENT_ENV=ci
- GOOGLE_APPLICATION_CREDENTIALS=/home/airflow/.config/gcloud/credentials.json
Expand Down Expand Up @@ -54,9 +55,7 @@ services:

webserver:
depends_on:
- postgres
- dask-worker
- dask-scheduler
- worker
environment: *airflow-env
image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
entrypoint: /entrypoint
Expand All @@ -65,7 +64,7 @@ services:
scheduler:
image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
depends_on:
- webserver
- postgres
environment: *airflow-env
entrypoint: /entrypoint
command: scheduler
Expand All @@ -74,10 +73,24 @@ services:
image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
depends_on:
- scheduler
- webserver
environment: *airflow-env
command: >
bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud
&& ./run_test.sh with-end-to-end"
worker:
environment: *airflow-env
depends_on:
- redis
- scheduler
image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
entrypoint: /entrypoint
hostname: worker
command: >
bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud
&& sudo install -D /tmp/.aws-credentials -m 644 --no-target-directory /home/airflow/.aws/credentials
&& airflow worker"
postgres:
image: postgres:9.6
Expand All @@ -86,29 +99,23 @@ services:
- POSTGRES_PASSWORD=airflow
- POSTGRES_DB=airflow
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
test: ["CMD-SHELL", "pg_isready -U airflow"]
interval: 5s
timeout: 5s
retries: 5
redis:
image: redis:5.0.5
environment:
- ALLOW_EMPTY_PASSWORD=yes

dask-scheduler:
environment: *airflow-env
image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
hostname: dask-scheduler
entrypoint: [ ]
command: ["dask-scheduler"]

dask-worker:
environment: *airflow-env
flower:
image: elifesciences/data-hub-ejp-xml-pipeline-dev
depends_on:
- dask-scheduler
image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
hostname: dask-worker
entrypoint: []
command: >
bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud
&& sudo install -D /tmp/.aws-credentials -m 644 --no-target-directory /home/airflow/.aws/credentials
&& ./worker.sh tcp://dask-scheduler:8786"
- redis
environment: *airflow-env
ports:
- "5555:5555"
command: celery flower

peerscout-api:
build:
Expand Down
2 changes: 0 additions & 2 deletions requirements.dag.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
apache-airflow[crypto,celery,postgres,jdbc,ssh]==1.10.15
dask[complete]<=2021.2.0, >=2.17.0
distributed<=2021.2.0, >=2.17.0
papermill==2.3.3
click==7.1.2
ansiwrap==0.8.4
Expand Down
9 changes: 0 additions & 9 deletions worker.sh

This file was deleted.

0 comments on commit def3d7a

Please sign in to comment.