Skip to content

Commit

Permalink
Build XGBoostJob example images in CI (kubeflow#1913)
Browse files Browse the repository at this point in the history
* Build XGBoostJob example images in CI

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

* Organize example manifests

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

* Fix action files

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

* Replace image names

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>

---------

Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
  • Loading branch information
tenzen-y authored Sep 16, 2023
1 parent afba76b commit 3d46a36
Show file tree
Hide file tree
Showing 23 changed files with 105 additions and 254 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/build-and-publish-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
dockerfile:
required: true
type: string
context:
required: false
type: string
default: .
secrets:
DOCKERHUB_USERNAME:
required: false
Expand Down Expand Up @@ -48,6 +52,7 @@ jobs:
image: docker.io/kubeflow/${{ inputs.component-name }}
dockerfile: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
context: ${{ inputs.context }}
push: true

- name: Test Build For Component ${{ inputs.component-name }}
Expand All @@ -57,4 +62,5 @@ jobs:
image: docker.io/kubeflow/${{ inputs.component-name }}
dockerfile: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
context: ${{ inputs.context }}
push: false
16 changes: 10 additions & 6 deletions .github/workflows/publish-example-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
# TODO (tenzen-y): Support linux/arm64 platform
platforms: linux/amd64
dockerfile: ${{ matrix.dockerfile }}
context: ${{ matrix.context }}
secrets:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
Expand All @@ -37,14 +38,17 @@ jobs:
dockerfile: examples/pytorch/elastic/imagenet/Dockerfile
- component-name: pytorch-elastic-example-echo
dockerfile: examples/pytorch/elastic/echo/Dockerfile
- component-name: xgboost-dist-iris
dockerfile: examples/xgboost/xgboost-dist/Dockerfile
context: examples/xgboost/xgboost-dist
- component-name: lightgbm-dist-py-test
dockerfile: examples/xgboost/lightgbm-dist/Dockerfile
context: examples/xgboost/lightgbm-dist
- component-name: xgboost-dist-rabit-test
dockerfile: examples/xgboost/smoke-dist/Dockerfile
context: examples/xgboost/smoke-dist

# TODO (tenzen-y): Fix the below broken Dockerfiles
# - component-name: lightgbm-dist-py-test
# dockerfile: examples/xgboost/lightgbm-dist/Dockerfile
# - component-name: xgboost-dist-rabit-test
# dockerfile: examples/xgboost/smoke-dist/Dockerfile
# - component-name: xgboost-dist-iris
# dockerfile: examples/xgboost/xgboost-dist
# - component-name: mxnet-gpu
# dockerfile: examples/mxnet/train/Dockerfile
# - component-name: mxnet-auto-tuning
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/template-publish-image/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ inputs:
platforms:
required: true
description: e.g, linux/amd64
context:
required: false
default: .
description: e.g, examples/xgboost/xgboost-dist
push:
required: true
description: whether to push container images or not
Expand Down Expand Up @@ -40,7 +44,7 @@ runs:
uses: docker/build-push-action@v3
with:
platforms: ${{ inputs.platforms }}
context: .
context: ${{ inputs.context }}
file: ${{ inputs.dockerfile }}
push: ${{ inputs.push }}
tags: ${{ steps.meta.outputs.tags }}
Expand Down
57 changes: 30 additions & 27 deletions examples/xgboost/lightgbm-dist/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
FROM ubuntu:16.04
# inspired from https://github.com/microsoft/LightGBM/blob/v4.1.0/docker/dockerfile-cli
FROM ubuntu:20.04 as builder

ARG CONDA_DIR=/opt/conda
ENV PATH $CONDA_DIR/bin:$PATH
ENV \
DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8

RUN apt-get update && \
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
ca-certificates \
cmake \
build-essential \
gcc \
g++ \
git \
curl && \
# python environment
curl -sL https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o conda.sh && \
/bin/bash conda.sh -f -b -p $CONDA_DIR && \
export PATH="$CONDA_DIR/bin:$PATH" && \
conda config --set always_yes yes --set changeps1 no && \
# lightgbm
conda install -q -y numpy==1.20.3 scipy==1.6.2 scikit-learn==0.24.2 pandas==1.3.0 && \
git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \
ca-certificates \
cmake \
build-essential \
gcc \
g++ \
git \
libomp-dev && \
rm -rf /var/lib/apt/lists/*

RUN git clone \
--recursive \
--branch v4.1.0 \
--depth 1 \
https://github.com/Microsoft/LightGBM && \
mkdir LightGBM/build && \
cd LightGBM/build && \
cmake .. && \
make -j4 && \
make install && \
cd ../python-package && \
python setup.py install_lib && \
# clean
apt-get autoremove -y && apt-get clean && \
conda clean -a -y && \
rm -rf /usr/local/src/* && \
rm -rf /LightGBM
cd "${HOME}" && \
rm -rf LightGBM

FROM python:3.7

COPY requirements.txt .
RUN pip install -r requirements.txt
COPY --from=builder /usr/local/bin/lightgbm /usr/local/bin/lightgbm

WORKDIR /app

Expand All @@ -41,4 +44,4 @@ ADD https://raw.githubusercontent.com/microsoft/LightGBM/stable/examples/paralle
ADD https://raw.githubusercontent.com/microsoft/LightGBM/stable/examples/parallel_learning/binary.test data/.
COPY *.py ./

ENTRYPOINT [ "python", "/app/main.py" ]
ENTRYPOINT [ "python", "/app/main.py" ]
9 changes: 1 addition & 8 deletions examples/xgboost/lightgbm-dist/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@ This folder containers Dockerfile and Python scripts to run a distributed Lightg
The code is based in this [example](https://github.com/microsoft/LightGBM/tree/master/examples/parallel_learning) in the official github repository of the library.


**Build image**
The default image name and tag is `kubeflow/lightgbm-dist-py-test:1.0` respectiveily.

```shell
docker build -f Dockerfile -t kubeflow/lightgbm-dist-py-test:1.0 ./
```

**Start the training**

```
Expand All @@ -24,7 +17,7 @@ kubectl create -f xgboostjob_v1_lightgbm_dist_training.yaml
Here is sample output when the job is running. The output result like this

```
apiVersion: xgboostjob.kubeflow.org/v1
apiVersion: kubeflow.org/v1
kind: XGBoostJob
metadata:
annotations:
Expand Down
4 changes: 4 additions & 0 deletions examples/xgboost/lightgbm-dist/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
numpy==1.20.3
scipy==1.6.2
scikit-learn==0.24.2
pandas==1.3.0
14 changes: 1 addition & 13 deletions examples/xgboost/smoke-dist/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,14 @@
# Install python 3.6
FROM python:3.6

RUN apt-get update
RUN apt-get install -y git make g++ cmake

RUN mkdir -p /opt/mlkube

# Download the rabit tracker and xgboost code.

COPY tracker.py /opt/mlkube/
COPY xgboost_smoke_test.py /opt/mlkube/
COPY requirements.txt /opt/mlkube/

# Install requirements

RUN pip install -r /opt/mlkube/requirements.txt

# Build XGBoost.
RUN git clone --recursive https://github.com/dmlc/xgboost && \
cd xgboost && \
make -j$(nproc) && \
cd python-package; python setup.py install

COPY xgboost_smoke_test.py /opt/mlkube/

ENTRYPOINT ["python", "/opt/mlkube/xgboost_smoke_test.py"]
15 changes: 3 additions & 12 deletions examples/xgboost/smoke-dist/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@

This folder containers Dockerfile and distributed send/recv test.

**Build Image**

The default image name and tag is `kubeflow/xgboost-dist-rabit-test:1.2`.
You can build the image based on your requirement.

```shell
docker build -f Dockerfile -t kubeflow/xgboost-dist-rabit-test:1.2 ./
```

**Start and test XGBoost Rabit tracker**

Expand All @@ -23,15 +15,14 @@ kubectl create -f xgboostjob_v1alpha1_rabit_test.yaml
```
Here is sample output when the job is running. The output result like this
```
apiVersion: xgboostjob.kubeflow.org/v1alpha1
apiVersion: kubeflow.org/v1
kind: XGBoostJob
metadata:
creationTimestamp: "2019-06-21T03:32:57Z"
generation: 7
name: xgboost-dist-test
namespace: default
resourceVersion: "258466"
selfLink: /apis/xgboostjob.kubeflow.org/v1alpha1/namespaces/default/xgboostjobs/xgboost-dist-test
uid: 431dc182-93d5-11e9-bbab-080027dfbfe2
spec:
RunPolicy:
Expand All @@ -45,7 +36,7 @@ spec:
creationTimestamp: null
spec:
containers:
- image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
- image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
imagePullPolicy: Always
name: xgboostjob
ports:
Expand All @@ -60,7 +51,7 @@ spec:
creationTimestamp: null
spec:
containers:
- image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
- image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
imagePullPolicy: Always
name: xgboostjob
ports:
Expand Down
1 change: 1 addition & 0 deletions examples/xgboost/smoke-dist/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ Cython>=0.29.4
requests>=2.21.0
urllib3>=1.21.1
scipy>=1.4.1
xgboost==1.5.2
4 changes: 2 additions & 2 deletions examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand All @@ -23,7 +23,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand All @@ -27,7 +27,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand Down
17 changes: 2 additions & 15 deletions examples/xgboost/xgboost-dist/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,12 @@
# Install python 36.
# Install python 3.6.
FROM python:3.6

RUN apt-get update
RUN apt-get install -y git make g++ cmake

RUN mkdir -p /opt/mlkube

# Download the rabit tracker and xgboost code.

COPY requirements.txt /opt/mlkube/

# Install requirements

COPY requirements.txt /opt/mlkube/
RUN pip install -r /opt/mlkube/requirements.txt

# Build XGBoost.
RUN git clone --recursive https://github.com/dmlc/xgboost && \
cd xgboost && \
make -j$(nproc) && \
cd python-package; python setup.py install

COPY *.py /opt/mlkube/

ENTRYPOINT ["python", "/opt/mlkube/main.py"]
Loading

0 comments on commit 3d46a36

Please sign in to comment.