From def3d7a3c081d1525ae929edfde967940e5f525d Mon Sep 17 00:00:00 2001
From: Hazal <HazalCiplak@users.noreply.github.com>
Date: Thu, 12 May 2022 13:58:24 +0100
Subject: [PATCH] Remove dask dependency  (#570)

* removed dask from requirements

* updated postgres healtcheck user to airflow

* added celerexecutor removed dask executor

* removed dask services from docker-compose

* removed worker.sh

* removed dask env vars

* update makefile
---
 Dockerfile.airflow          |  3 --
 Makefile                    | 12 ++------
 docker-compose.override.yml |  2 +-
 docker-compose.yml          | 55 +++++++++++++++++++++----------------
 requirements.dag.txt        |  2 --
 worker.sh                   |  9 ------
 6 files changed, 34 insertions(+), 49 deletions(-)
 delete mode 100644 worker.sh

diff --git a/Dockerfile.airflow b/Dockerfile.airflow
index d877bf73..7d6fb622 100644
--- a/Dockerfile.airflow
+++ b/Dockerfile.airflow
@@ -51,9 +51,6 @@ COPY --chown=airflow:airflow run_test.sh ./
 
 RUN if [ "${install_dev}" = "y" ]; then chmod +x run_test.sh; fi
 
-COPY --chown=airflow:airflow worker.sh ./
-RUN chmod +x worker.sh
-
 COPY notebooks/example.ipynb ./applications_file_directory/notebooks/example.ipynb
 
 RUN mkdir -p $AIRFLOW_HOME/serve
diff --git a/Makefile b/Makefile
index 578e3001..5f27079c 100644
--- a/Makefile
+++ b/Makefile
@@ -245,20 +245,12 @@ airflow-scheduler-exec:
 	$(AIRFLOW_DOCKER_COMPOSE) exec scheduler bash
 
 
-airflow-dask-worker-shell:
-	$(AIRFLOW_DOCKER_COMPOSE) run --rm dask-worker bash
-
-
-airflow-dask-worker-exec:
-	$(AIRFLOW_DOCKER_COMPOSE) exec dask-worker bash
-
-
 airflow-logs:
-	$(AIRFLOW_DOCKER_COMPOSE) logs -f scheduler webserver dask-worker
+	$(AIRFLOW_DOCKER_COMPOSE) logs -f scheduler webserver worker
 
 
 airflow-start:
-	$(AIRFLOW_DOCKER_COMPOSE) up -d --scale dask-worker=1 scheduler
+	$(AIRFLOW_DOCKER_COMPOSE) up worker webserver flower
 	$(MAKE) airflow-print-url
 
 
diff --git a/docker-compose.override.yml b/docker-compose.override.yml
index 101b58e5..2380094c 100644
--- a/docker-compose.override.yml
+++ b/docker-compose.override.yml
@@ -19,7 +19,7 @@ services:
   scheduler:
     volumes: *airflow-volumes
 
-  dask-worker:
+  worker:
     environment:
       - DEPLOYMENT_ENV=dev
     volumes: *airflow-volumes
diff --git a/docker-compose.yml b/docker-compose.yml
index a8ea95dd..2d36e04b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,10 +6,11 @@ x-airflow-env:
   - LOAD_EX=n
   - AIRFLOW_HOST=webserver
   - AIRFLOW_PORT=8080
-  - AIRFLOW__CORE__EXECUTOR=DaskExecutor
+  - AIRFLOW__CORE__EXECUTOR=CeleryExecutor
+  - AIRFLOW__CELERY__BROKER_URL=redis://redis:6379/1
+  - AIRFLOW__CELERY__RESULT_BACKEND=db+postgresql://airflow:airflow@postgres:5432/airflow
   - AIRFLOW__CORE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@postgres:5432/airflow
   - AIRFLOW__CORE__FERNET_KEY='81HqDtbqAywKSOumSha3BhWNOdQ26slT6K0YaZeZyPs='
-  - AIRFLOW__DASK__CLUSTER_ADDRESS=dask-scheduler:8786
   - AIRFLOW__API__AUTH_BACKEND=airflow.api.auth.backend.default
   - DEPLOYMENT_ENV=ci
   - GOOGLE_APPLICATION_CREDENTIALS=/home/airflow/.config/gcloud/credentials.json
@@ -54,9 +55,7 @@ services:
 
   webserver:
     depends_on:
-      - postgres
-      - dask-worker
-      - dask-scheduler
+      - worker
     environment: *airflow-env
     image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
     entrypoint: /entrypoint
@@ -65,7 +64,7 @@ services:
   scheduler:
     image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
     depends_on:
-      - webserver
+      - postgres
     environment: *airflow-env
     entrypoint: /entrypoint
     command: scheduler
@@ -74,10 +73,24 @@ services:
     image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
     depends_on:
       - scheduler
+      - webserver
     environment: *airflow-env
     command: >
       bash -c "sudo install -D /tmp/credentials.json -m 644 -t  /home/airflow/.config/gcloud
       && ./run_test.sh with-end-to-end"
+    
+  worker:
+    environment: *airflow-env
+    depends_on:
+      - redis
+      - scheduler
+    image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
+    entrypoint: /entrypoint
+    hostname: worker
+    command: >
+        bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud
+        && sudo install -D /tmp/.aws-credentials -m 644 --no-target-directory /home/airflow/.aws/credentials
+        && airflow worker"
 
   postgres:
     image: postgres:9.6
@@ -86,29 +99,23 @@ services:
       - POSTGRES_PASSWORD=airflow
       - POSTGRES_DB=airflow
     healthcheck:
-      test: ["CMD-SHELL", "pg_isready -U postgres"]
+      test: ["CMD-SHELL", "pg_isready -U airflow"]
       interval: 5s
       timeout: 5s
       retries: 5
+  redis:
+    image: redis:5.0.5
+    environment:
+        - ALLOW_EMPTY_PASSWORD=yes
 
-  dask-scheduler:
-    environment: *airflow-env
-    image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
-    hostname: dask-scheduler
-    entrypoint: [ ]
-    command: ["dask-scheduler"]
-
-  dask-worker:
-    environment: *airflow-env
+  flower:
+    image: elifesciences/data-hub-ejp-xml-pipeline-dev
     depends_on:
-      - dask-scheduler
-    image: elifesciences/data-science-airflow-dag:${IMAGE_TAG}
-    hostname: dask-worker
-    entrypoint: []
-    command: >
-      bash -c "sudo install -D /tmp/credentials.json -m 644 -t /home/airflow/.config/gcloud
-      && sudo install -D /tmp/.aws-credentials -m 644 --no-target-directory /home/airflow/.aws/credentials
-      && ./worker.sh tcp://dask-scheduler:8786"
+        - redis
+    environment: *airflow-env
+    ports:
+        - "5555:5555"
+    command: celery flower
 
   peerscout-api:
     build:
diff --git a/requirements.dag.txt b/requirements.dag.txt
index 5a79c7c8..1f25eadc 100644
--- a/requirements.dag.txt
+++ b/requirements.dag.txt
@@ -1,6 +1,4 @@
 apache-airflow[crypto,celery,postgres,jdbc,ssh]==1.10.15
-dask[complete]<=2021.2.0, >=2.17.0
-distributed<=2021.2.0, >=2.17.0
 papermill==2.3.3
 click==7.1.2
 ansiwrap==0.8.4
diff --git a/worker.sh b/worker.sh
deleted file mode 100644
index 383b53fc..00000000
--- a/worker.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-set -e
-
-cd $AIRFLOW_HOME/serve
-python3 -m http.server 8793 &
-
-cd -
-dask-worker $@