From c8d98405a8f7b0e5d07391b671dcc61bb9d7bad5 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Fri, 23 Feb 2024 21:37:08 +0800
Subject: [PATCH] Use torch 2.2 for daily CI (model tests) (#29208)

* Use torch 2.2 for daily CI (model tests)

* update

* update

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 .github/workflows/build-docker-images.yml     | 12 +------
 docker/transformers-all-latest-gpu/Dockerfile | 33 +++++++------------
 2 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
index be070a95d3a9..2b198bd4af56 100644
--- a/.github/workflows/build-docker-images.yml
+++ b/.github/workflows/build-docker-images.yml
@@ -20,18 +20,8 @@ concurrency:
 jobs:
   latest-docker:
     name: "Latest PyTorch + TensorFlow [dev]"
-    runs-on: ubuntu-22.04
+    runs-on: [intel-cpu, 8-cpu, ci]
     steps:
-      - name: Cleanup disk
-        run: |
-          sudo ls -l /usr/local/lib/
-          sudo ls -l /usr/share/
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
-          sudo rm -rf /usr/local/lib/android
-          sudo rm -rf /usr/share/dotnet
-          sudo du -sh /usr/local/lib/
-          sudo du -sh /usr/share/
       -
         name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
index e96eb9539c8b..9afac41d5b04 100644
--- a/docker/transformers-all-latest-gpu/Dockerfile
+++ b/docker/transformers-all-latest-gpu/Dockerfile
@@ -9,9 +9,9 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).
 
-ARG PYTORCH='2.1.1'
+ARG PYTORCH='2.2.0'
 # (not always a valid torch version)
-ARG INTEL_TORCH_EXT='2.1.100'
+ARG INTEL_TORCH_EXT='2.2.0'
 # Example: `cu102`, `cu113`, etc.
 ARG CUDA='cu118'
 
@@ -23,6 +23,14 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip
 ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 
+# During switch torch 2.2, we need to move (explicit) torch installation below but keep tf installation here.
+# (otherwise we get `The runner has received a shutdown signal.` whose root cause is unknown but likely disk being full)
+RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 tensorflow_text tensorflow_probability
+
+RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime]
+
+# RUN python3 -m pip uninstall -y torch torchvision torchaudio && python3 -m pip install --no-cache-dir -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
+
 # TODO: Handle these in a python utility script
 RUN [ ${#PYTORCH} -gt 0 -a "$PYTORCH" != "pre" ] && VERSION='torch=='$PYTORCH'.*' ||  VERSION='torch'; echo "export VERSION='$VERSION'" >> ~/.profile
 RUN echo torch=$VERSION
@@ -31,10 +39,6 @@ RUN echo torch=$VERSION
 # TODO: We might need to specify proper versions that work with a specific torch version (especially for past CI).
 RUN [ "$PYTORCH" != "pre" ] && python3 -m pip install --no-cache-dir -U $VERSION torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA || python3 -m pip install --no-cache-dir -U --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/$CUDA
 
-RUN python3 -m pip install --no-cache-dir -U tensorflow==2.13 protobuf==3.20.3 tensorflow_text tensorflow_probability
-
-RUN python3 -m pip install --no-cache-dir -e ./transformers[dev,onnxruntime]
-
 RUN python3 -m pip uninstall -y flax jax
 
 RUN python3 -m pip install --no-cache-dir intel_extension_for_pytorch==$INTEL_TORCH_EXT -f https://developer.intel.com/ipex-whl-stable-cpu
@@ -46,22 +50,7 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/acc
 
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/peft@main#egg=peft
 
-# Add bitsandbytes for mixed int8 testing
-RUN python3 -m pip install --no-cache-dir bitsandbytes
-
-# Add auto-gptq for gtpq quantization testing
-RUN python3 -m pip install --no-cache-dir auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
-
-# Add einops for additional model testing
-RUN python3 -m pip install --no-cache-dir einops
-
-# Add aqlm for quantization testing
-RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.1
-
-# Add autoawq for quantization testing
-RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp38-cp38-linux_x86_64.whl
-
-# For bettertransformer + gptq
+# For bettertransformer
 RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/optimum@main#egg=optimum
 
 # For video model testing