Skip to content

Commit

Permalink
add spark master and worker image build
Browse files Browse the repository at this point in the history
  • Loading branch information
thanh-nguyen-dang committed Jul 1, 2024
1 parent af235d9 commit 4acc114
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 3 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/image_build_and_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,36 @@ jobs:
ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
build-master:
name: spark master
uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
needs: [build-spark-base]
with:
OVERRIDE_REPO_NAME: spark-master
OVERRIDE_TAG_NAME: 3.3.0-hadoop3.3
DOCKERFILE_LOCATION: "./dockers/spark/master/Dockerfile"
DOCKERFILE_BUILD_CONTEXT: "./spark/master"
USE_QUAY_ONLY: true
secrets:
ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}
build-worker:
name: spark worker
uses: uc-cdis/.github/.github/workflows/image_build_push.yaml@master
needs: [build-spark-base]
with:
OVERRIDE_REPO_NAME: spark-worker
OVERRIDE_TAG_NAME: 3.3.0-hadoop3.3
DOCKERFILE_LOCATION: "./dockers/spark/worker/Dockerfile"
DOCKERFILE_BUILD_CONTEXT: "./spark/worker"
USE_QUAY_ONLY: true
secrets:
ECR_AWS_ACCESS_KEY_ID: ${{ secrets.ECR_AWS_ACCESS_KEY_ID }}
ECR_AWS_SECRET_ACCESS_KEY: ${{ secrets.ECR_AWS_SECRET_ACCESS_KEY }}
QUAY_USERNAME: ${{ secrets.QUAY_USERNAME }}
QUAY_ROBOT_TOKEN: ${{ secrets.QUAY_ROBOT_TOKEN }}

# then run the tests
test:
Expand Down
4 changes: 1 addition & 3 deletions tube/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,5 @@

os.environ[
"PYSPARK_SUBMIT_ARGS"
] = "--jars {}/dist/elasticsearch-spark-20_2.11-{}.jar pyspark-shell".format(
ES_HADOOP_HOME_BIN, ES_HADOOP_VERSION
)
] = f"--jars {ES_HADOOP_HOME_BIN}/dist/elasticsearch-spark-20_2.11-{ES_HADOOP_VERSION}.jar pyspark-shell"
os.environ["HADOOP_CLIENT_OPTS"] = os.getenv("HADOOP_CLIENT_OPTS", "")
2 changes: 2 additions & 0 deletions tube/utils/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def make_spark_context(tube_config):
.set("spark.executor.memory", tube_config.SPARK_EXECUTOR_MEMORY)
.set("spark.driver.memory", tube_config.SPARK_DRIVER_MEMORY)
.set("spark.python.profile", "false")
.set("spark.executor.extraJavaOptions", "-Dlog4j.configuration=file:/spark/conf/log4j.properties")
.set("spark.submit.pyFiles", "/tube")
.setAppName(config.APP_NAME)
)
if tube_config.RUNNING_MODE == enums.RUNNING_MODE_DEV:
Expand Down

0 comments on commit 4acc114

Please sign in to comment.