Skip to content

Commit

Permalink
Add Neuronx TGI image 0.0.27 (#110)
Browse files Browse the repository at this point in the history
* feat(optimum-neuron): add v0.0.27 recipe

* chore: release Neuronx TGI 0.0.27
  • Loading branch information
dacorvo authored Dec 17, 2024
1 parent 9dee6c1 commit 6413521
Show file tree
Hide file tree
Showing 2 changed files with 198 additions and 7 deletions.
192 changes: 192 additions & 0 deletions huggingface/pytorch/optimum/docker/0.0.27/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# Fetch and extract the TGI sources
FROM alpine AS tgi
RUN mkdir -p /tgi
ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v3.0.0.tar.gz /tgi/sources.tar.gz
RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1

# Fetch also the optimum-neuron sources that contain modified TGI sources
FROM alpine AS optimum-neuron
RUN mkdir -p /optimum-neuron
ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.0.27.tar.gz /optimum-neuron/sources.tar.gz
RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1

# Build cargo components (adapted from TGI original Dockerfile)
# Note: we cannot use the cargo-chef base image as it uses python 3.11
FROM ubuntu:22.04 AS chef

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
curl ca-certificates build-essential \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --default-toolchain 1.80.1 --profile minimal -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN cargo install cargo-chef --locked

WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse

FROM chef AS planner
COPY /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml
COPY --from=tgi /tgi/Cargo.lock Cargo.lock
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
COPY --from=tgi /tgi/proto proto
COPY --from=tgi /tgi/router router
COPY --from=tgi /tgi/backends backends
COPY --from=tgi /tgi/launcher launcher
RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
unzip python3-dev libssl-dev pkg-config \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
rm -f $PROTOC_ZIP

COPY /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml
COPY --from=planner /usr/src/recipe.json recipe.json
RUN cargo chef cook --release --recipe-path recipe.json

COPY /optimum-neuron/text-generation-inference/Cargo.toml Cargo.toml
COPY --from=tgi /tgi/Cargo.lock Cargo.lock
COPY --from=tgi /tgi/rust-toolchain.toml rust-toolchain.toml
COPY --from=tgi /tgi/proto proto
COPY --from=tgi /tgi/router router
COPY --from=tgi /tgi/backends backends
COPY --from=tgi /tgi/launcher launcher
# Remove this line once TGI has fixed the conflict
RUN cargo update ureq --precise 2.9.7
RUN cargo build --release

# Python base image
FROM ubuntu:22.04 AS base

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
python3-pip \
python3-setuptools \
python-is-python3 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN pip3 --no-cache-dir install --upgrade pip

# Python server build image
FROM base AS pyserver

RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
make \
python3-venv \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

RUN install -d /pyserver
WORKDIR /pyserver
COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/server server
COPY --from=tgi /tgi/proto proto
RUN pip3 install -r server/build-requirements.txt
RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto make -C server gen-server

# Neuron base image (used for deployment)
FROM base AS neuron

# Install system prerequisites
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
gnupg2 \
wget \
python3-dev \
libexpat1 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

RUN echo "deb https://apt.repos.neuron.amazonaws.com jammy main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -

# Install neuronx packages
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
aws-neuronx-dkms=2.18.20.0 \
aws-neuronx-collectives=2.22.33.0-d2128d1aa \
aws-neuronx-runtime-lib=2.22.19.0-5856c0b42 \
aws-neuronx-tools=2.19.0.0 \
libxml2 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

ENV PATH="/opt/bin/:/opt/aws/neuron/bin:${PATH}"

RUN pip3 install \
neuronx-cc==2.15.143.0 \
torch-neuronx==2.1.2.2.3.2 \
transformers-neuronx==0.12.313 \
libneuronxla==2.0.5347.0 \
--extra-index-url=https://pip.repos.neuron.amazonaws.com

# Install HuggingFace packages
RUN pip3 install \
hf_transfer huggingface_hub

# Install optimum-neuron
COPY --from=optimum-neuron /optimum-neuron optimum-neuron
RUN pip3 install ./optimum-neuron

# TGI base env
ENV HUGGINGFACE_HUB_CACHE=/tmp \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PORT=80

# Disable color logs as they are not supported by CloudWatch
ENV LOGURU_COLORIZE=NO
ENV LOG_COLORIZE=0

# Install router
COPY --from=builder /usr/src/target/release/text-generation-router-v2 /usr/local/bin/text-generation-router
# Install launcher
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
# Install python server
COPY --from=pyserver /pyserver/build/dist dist
RUN pip install dist/text_generation_server*.tar.gz

# AWS Sagemaker compatible image
FROM neuron as sagemaker

COPY --from=optimum-neuron /optimum-neuron/text-generation-inference/sagemaker-entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]


RUN apt-get update && apt-get install -y --no-install-recommends curl unzip \
&& rm -rf /var/lib/apt/lists/*
RUN HOME_DIR=/root && \
pip install requests && \
curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip && \
unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ && \
cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance && \
chmod +x /usr/local/bin/testOSSCompliance && \
chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh && \
${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python && \
rm -rf ${HOME_DIR}/oss_compliance*

RUN echo "N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image \
has an indirect documentation dependency on third party <docutils/tools/editors/emacs/rst.el> project. The \
<docutils/tools/editors/emacs/rst.el> project's licensing includes the <GPL v3> license. \
\n\n\
N.B.: Although this image is released under the Apache-2.0 License, the Dockerfile used to build the image uses the \
third party <Text Generation Inference (TGI)> project. The <Text Generation Inference (TGI)> project's licensing \
includes the <HFOIL --> https://github.com/huggingface/text-generation-inference/blob/main/LICENSE> \
license." > /root/THIRD_PARTY_LICENSES

LABEL dlc_major_version="1"
LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
13 changes: 6 additions & 7 deletions releases.json
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
{
"device": "inf2",
"min_version": "0.0.22",
"max_version": "0.0.25",
"max_version": "0.0.27",
"os_version": "ubuntu22.04",
"python_version": "py310",
"pytorch_version": "2.1.2"
Expand Down Expand Up @@ -90,12 +90,11 @@
"releases": [
{
"framework": "TGI",
"device": "gpu",
"version": "2.4.0",
"device": "inf2",
"version": "0.0.27",
"os_version": "ubuntu22.04",
"python_version": "py311",
"pytorch_version": "2.4.0",
"cuda_version": "cu124"
"python_version": "py310",
"pytorch_version": "2.1.2"
}
]
}
}

0 comments on commit 6413521

Please sign in to comment.