From c8d98405a8f7b0e5d07391b671dcc61bb9d7bad5 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Fri, 23 Feb 2024 21:37:08 +0800 Subject: [PATCH] Use torch 2.2 for daily CI (model tests) (#29208) * Use torch 2.2 for daily CI (model tests) * update * update --------- Co-authored-by: ydshieh --- .github/workflows/build-docker-images.yml | 12 +------ docker/transformers-all-latest-gpu/Dockerfile | 33 +++++++------------ 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml index be070a95d3a9..2b198bd4af56 100644 --- a/.github/workflows/build-docker-images.yml +++ b/.github/workflows/build-docker-images.yml @@ -20,18 +20,8 @@ concurrency: jobs: latest-docker: name: "Latest PyTorch + TensorFlow [dev]" - runs-on: ubuntu-22.04 + runs-on: [intel-cpu, 8-cpu, ci] steps: - - name: Cleanup disk - run: | - sudo ls -l /usr/local/lib/ - sudo ls -l /usr/share/ - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - sudo rm -rf /usr/local/lib/android - sudo rm -rf /usr/share/dotnet - sudo du -sh /usr/local/lib/ - sudo du -sh /usr/share/ - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index e96eb9539c8b..9afac41d5b04 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -9,9 +9,9 @@ SHELL ["sh", "-lc"] # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant # to be used as arguments for docker build (so far). -ARG PYTORCH='2.1.1' +ARG PYTORCH='2.2.0' # (not always a valid torch version) -ARG INTEL_TORCH_EXT='2.1.100' +ARG INTEL_TORCH_EXT='2.2.0' # Example: `cu102`, `cu113`, etc. ARG CUDA='cu118' @@ -23,6 +23,14 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip ARG REF=main RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF +# During switch torch 2.2, we need to move (explicit) torch installation below but keep tf installation here. +# (otherwise we get `The runner has received a shutdown signal.` whose root cause is unknown but likely disk being full) +RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 tensorflow_text tensorflow_probability + +RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] + +# RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 + # TODO: Handle these in a python utility script RUN [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' || VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile RUN echo torch=$VERSION @@ -31,10 +39,6 @@ RUN echo torch=$VERSION # TODO: We might need to specify proper versions that work with a specific torch version (especially for past CI). RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA -RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 tensorflow_text tensorflow_probability - -RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime] - RUN python3 -m pip uninstall -y flax jax RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT -f https://developer.intel.com/ipex-whl-stable-cpu @@ -46,22 +50,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/acc RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft -# Add bitsandbytes for mixed int8 testing -RUN python3 -m pip install --no-cache-dir bitsandbytes - -# Add auto-gptq for gtpq quantization testing -RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ - -# Add einops for additional model testing -RUN python3 -m pip install --no-cache-dir einops - -# Add aqlm for quantization testing -RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.1 - -# Add autoawq for quantization testing -RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp38-cp38-linux_x86_64.whl - -# For bettertransformer + gptq +# For bettertransformer RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum # For video model testing