Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build XGBoostJob example images in CI #1913

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/build-and-publish-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ on:
dockerfile:
required: true
type: string
context:
required: false
type: string
default: .
secrets:
DOCKERHUB_USERNAME:
required: false
Expand Down Expand Up @@ -48,6 +52,7 @@ jobs:
image: docker.io/kubeflow/${{ inputs.component-name }}
dockerfile: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
context: ${{ inputs.context }}
push: true

- name: Test Build For Component ${{ inputs.component-name }}
Expand All @@ -57,4 +62,5 @@ jobs:
image: docker.io/kubeflow/${{ inputs.component-name }}
dockerfile: ${{ inputs.dockerfile }}
platforms: ${{ inputs.platforms }}
context: ${{ inputs.context }}
push: false
16 changes: 10 additions & 6 deletions .github/workflows/publish-example-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
# TODO (tenzen-y): Support linux/arm64 platform
platforms: linux/amd64
dockerfile: ${{ matrix.dockerfile }}
context: ${{ matrix.context }}
secrets:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
Expand All @@ -37,14 +38,17 @@ jobs:
dockerfile: examples/pytorch/elastic/imagenet/Dockerfile
- component-name: pytorch-elastic-example-echo
dockerfile: examples/pytorch/elastic/echo/Dockerfile
- component-name: xgboost-dist-iris
dockerfile: examples/xgboost/xgboost-dist/Dockerfile
context: examples/xgboost/xgboost-dist
- component-name: lightgbm-dist-py-test
dockerfile: examples/xgboost/lightgbm-dist/Dockerfile
context: examples/xgboost/lightgbm-dist
- component-name: xgboost-dist-rabit-test
dockerfile: examples/xgboost/smoke-dist/Dockerfile
context: examples/xgboost/smoke-dist

# TODO (tenzen-y): Fix the below broken Dockerfiles
# - component-name: lightgbm-dist-py-test
# dockerfile: examples/xgboost/lightgbm-dist/Dockerfile
# - component-name: xgboost-dist-rabit-test
# dockerfile: examples/xgboost/smoke-dist/Dockerfile
# - component-name: xgboost-dist-iris
# dockerfile: examples/xgboost/xgboost-dist
# - component-name: mxnet-gpu
# dockerfile: examples/mxnet/train/Dockerfile
# - component-name: mxnet-auto-tuning
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/template-publish-image/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ inputs:
platforms:
required: true
description: e.g, linux/amd64
context:
required: false
default: .
description: e.g, examples/xgboost/xgboost-dist
push:
required: true
description: whether to push container images or not
Expand Down Expand Up @@ -40,7 +44,7 @@ runs:
uses: docker/build-push-action@v3
with:
platforms: ${{ inputs.platforms }}
context: .
context: ${{ inputs.context }}
file: ${{ inputs.dockerfile }}
push: ${{ inputs.push }}
tags: ${{ steps.meta.outputs.tags }}
Expand Down
57 changes: 30 additions & 27 deletions examples/xgboost/lightgbm-dist/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
FROM ubuntu:16.04
# inspired from https://github.com/microsoft/LightGBM/blob/v4.1.0/docker/dockerfile-cli
FROM ubuntu:20.04 as builder

ARG CONDA_DIR=/opt/conda
ENV PATH $CONDA_DIR/bin:$PATH
ENV \
DEBIAN_FRONTEND=noninteractive \
LANG=C.UTF-8 \
LC_ALL=C.UTF-8

RUN apt-get update && \
RUN apt-get update -y && \
apt-get install -y --no-install-recommends \
ca-certificates \
cmake \
build-essential \
gcc \
g++ \
git \
curl && \
# python environment
curl -sL https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -o conda.sh && \
/bin/bash conda.sh -f -b -p $CONDA_DIR && \
export PATH="$CONDA_DIR/bin:$PATH" && \
conda config --set always_yes yes --set changeps1 no && \
# lightgbm
conda install -q -y numpy==1.20.3 scipy==1.6.2 scikit-learn==0.24.2 pandas==1.3.0 && \
git clone --recursive --branch stable --depth 1 https://github.com/Microsoft/LightGBM && \
ca-certificates \
cmake \
build-essential \
gcc \
g++ \
git \
libomp-dev && \
rm -rf /var/lib/apt/lists/*

RUN git clone \
--recursive \
--branch v4.1.0 \
--depth 1 \
https://github.com/Microsoft/LightGBM && \
mkdir LightGBM/build && \
cd LightGBM/build && \
cmake .. && \
make -j4 && \
make install && \
cd ../python-package && \
python setup.py install_lib && \
# clean
apt-get autoremove -y && apt-get clean && \
conda clean -a -y && \
rm -rf /usr/local/src/* && \
rm -rf /LightGBM
cd "${HOME}" && \
rm -rf LightGBM

FROM python:3.7

COPY requirements.txt .
RUN pip install -r requirements.txt
COPY --from=builder /usr/local/bin/lightgbm /usr/local/bin/lightgbm

WORKDIR /app

Expand All @@ -41,4 +44,4 @@ ADD https://raw.githubusercontent.com/microsoft/LightGBM/stable/examples/paralle
ADD https://raw.githubusercontent.com/microsoft/LightGBM/stable/examples/parallel_learning/binary.test data/.
COPY *.py ./

ENTRYPOINT [ "python", "/app/main.py" ]
ENTRYPOINT [ "python", "/app/main.py" ]
9 changes: 1 addition & 8 deletions examples/xgboost/lightgbm-dist/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@ This folder containers Dockerfile and Python scripts to run a distributed Lightg
The code is based in this [example](https://github.com/microsoft/LightGBM/tree/master/examples/parallel_learning) in the official github repository of the library.


**Build image**
The default image name and tag is `kubeflow/lightgbm-dist-py-test:1.0` respectiveily.

```shell
docker build -f Dockerfile -t kubeflow/lightgbm-dist-py-test:1.0 ./
```

**Start the training**

```
Expand All @@ -24,7 +17,7 @@ kubectl create -f xgboostjob_v1_lightgbm_dist_training.yaml
Here is sample output when the job is running. The output result like this

```
apiVersion: xgboostjob.kubeflow.org/v1
apiVersion: kubeflow.org/v1
kind: XGBoostJob
metadata:
annotations:
Expand Down
4 changes: 4 additions & 0 deletions examples/xgboost/lightgbm-dist/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
numpy==1.20.3
scipy==1.6.2
scikit-learn==0.24.2
pandas==1.3.0
14 changes: 1 addition & 13 deletions examples/xgboost/smoke-dist/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,26 +1,14 @@
# Install python 3.6
FROM python:3.6

RUN apt-get update
RUN apt-get install -y git make g++ cmake

RUN mkdir -p /opt/mlkube

# Download the rabit tracker and xgboost code.

COPY tracker.py /opt/mlkube/
COPY xgboost_smoke_test.py /opt/mlkube/
COPY requirements.txt /opt/mlkube/

# Install requirements

RUN pip install -r /opt/mlkube/requirements.txt

# Build XGBoost.
RUN git clone --recursive https://github.com/dmlc/xgboost && \
cd xgboost && \
make -j$(nproc) && \
cd python-package; python setup.py install

COPY xgboost_smoke_test.py /opt/mlkube/

ENTRYPOINT ["python", "/opt/mlkube/xgboost_smoke_test.py"]
15 changes: 3 additions & 12 deletions examples/xgboost/smoke-dist/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@

This folder containers Dockerfile and distributed send/recv test.

**Build Image**

The default image name and tag is `kubeflow/xgboost-dist-rabit-test:1.2`.
You can build the image based on your requirement.

```shell
docker build -f Dockerfile -t kubeflow/xgboost-dist-rabit-test:1.2 ./
```

**Start and test XGBoost Rabit tracker**

Expand All @@ -23,15 +15,14 @@ kubectl create -f xgboostjob_v1alpha1_rabit_test.yaml
```
Here is sample output when the job is running. The output result like this
```
apiVersion: xgboostjob.kubeflow.org/v1alpha1
apiVersion: kubeflow.org/v1
kind: XGBoostJob
metadata:
creationTimestamp: "2019-06-21T03:32:57Z"
generation: 7
name: xgboost-dist-test
namespace: default
resourceVersion: "258466"
selfLink: /apis/xgboostjob.kubeflow.org/v1alpha1/namespaces/default/xgboostjobs/xgboost-dist-test
uid: 431dc182-93d5-11e9-bbab-080027dfbfe2
spec:
RunPolicy:
Expand All @@ -45,7 +36,7 @@ spec:
creationTimestamp: null
spec:
containers:
- image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
- image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
imagePullPolicy: Always
name: xgboostjob
ports:
Expand All @@ -60,7 +51,7 @@ spec:
creationTimestamp: null
spec:
containers:
- image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
- image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
imagePullPolicy: Always
name: xgboostjob
ports:
Expand Down
1 change: 1 addition & 0 deletions examples/xgboost/smoke-dist/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ Cython>=0.29.4
requests>=2.21.0
urllib3>=1.21.1
scipy>=1.4.1
xgboost==1.5.2
4 changes: 2 additions & 2 deletions examples/xgboost/smoke-dist/xgboostjob_v1_rabit_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand All @@ -23,7 +23,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand All @@ -27,7 +27,7 @@ spec:
spec:
containers:
- name: xgboost
image: docker.io/merlintang/xgboost-dist-rabit-test:1.2
image: docker.io/kubeflow/xgboost-dist-rabit-test:latest
ports:
- containerPort: 9991
name: xgboostjob-port
Expand Down
17 changes: 2 additions & 15 deletions examples/xgboost/xgboost-dist/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,25 +1,12 @@
# Install python 36.
# Install python 3.6.
FROM python:3.6

RUN apt-get update
RUN apt-get install -y git make g++ cmake

RUN mkdir -p /opt/mlkube

# Download the rabit tracker and xgboost code.

COPY requirements.txt /opt/mlkube/

# Install requirements

COPY requirements.txt /opt/mlkube/
RUN pip install -r /opt/mlkube/requirements.txt

# Build XGBoost.
RUN git clone --recursive https://github.com/dmlc/xgboost && \
cd xgboost && \
make -j$(nproc) && \
cd python-package; python setup.py install

COPY *.py /opt/mlkube/

ENTRYPOINT ["python", "/opt/mlkube/main.py"]
Loading