Skip to content
This repository has been archived by the owner on Feb 15, 2025. It is now read-only.

Commit

Permalink
chore: Remove model weights from container images (#786)
Browse files Browse the repository at this point in the history
* chore: add PVC to deployments to store model weights
* chore: add onCreate action to download model files
* chore: use Zarf Injection to populate PVC with model files
* chore: add zarf vars for pvc config
  • Loading branch information
YrrepNoj authored Aug 1, 2024
1 parent 08f1d10 commit 33e4efb
Show file tree
Hide file tree
Showing 39 changed files with 436 additions and 57 deletions.
9 changes: 9 additions & 0 deletions .github/actions/python/action.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
name: setup-python
description: "Setup Python and library dependencies"

inputs:
additionalOptionalDep:
description: "Additional optional dependencies to install"

runs:
using: composite
steps:
Expand All @@ -10,5 +14,10 @@ runs:
python-version-file: 'pyproject.toml'

- name: Install Python Deps
shell: bash
run: python -m pip install ".[dev]"

- name: Install additionalDeps
if: ${{ inputs.additionalOptionalDep != '' }}
shell: bash
run: python -m pip install ".[${{ inputs.additionalOptionalDep }}]"
7 changes: 2 additions & 5 deletions .github/workflows/e2e-vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,9 @@ jobs:
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

- name: Setup Python
uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c #v5.0.0
uses: ./.github/actions/python
with:
python-version-file: 'pyproject.toml'

- name: Install Python Deps
run: python -m pip install "."
additionalOptionalDep: dev-vllm

- name: Setup UDS Environment
uses: defenseunicorns/uds-common/.github/actions/setup@05f42bb3117b66ebef8c72ae050b34bce19385f5
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/e2e-whisper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ jobs:

- name: Setup Python
uses: ./.github/actions/python
with:
additionalOptionalDep: dev-whisper

- name: Setup UDS Cluster
uses: ./.github/actions/uds-cluster
Expand Down
14 changes: 0 additions & 14 deletions packages/llama-cpp-python/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,6 @@ ARG SDK_DEST=src/leapfrogai_sdk/build
USER root
WORKDIR /leapfrogai

# download model
RUN python -m pip install -U huggingface_hub[cli,hf_transfer]
ARG REPO_ID=TheBloke/SynthIA-7B-v2.0-GGUF
ARG FILENAME=synthia-7b-v2.0.Q4_K_M.gguf
ARG REVISION=3f65d882253d1f15a113dabf473a7c02a004d2b5

# NOTE: This is checking for a pre-downloaded model file in the local build dir before downloading the model from HuggingFace
# TODO: Add checksum validation to verify the model in the local build-dir is the model we expect
COPY packages/llama-cpp-python/scripts/model_download.py scripts/model_download.py
RUN REPO_ID=${REPO_ID} FILENAME=${FILENAME} REVISION=${REVISION} python3.11 scripts/model_download.py
RUN mv .model/*.gguf .model/model.gguf


# create virtual environment for light-weight portability and minimal libraries
RUN python3.11 -m venv .venv
ENV PATH="/leapfrogai/.venv/bin:$PATH"
Expand All @@ -42,7 +29,6 @@ ENV PATH="/leapfrogai/.venv/bin:$PATH"
WORKDIR /leapfrogai

COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/
COPY --from=builder /leapfrogai/.model/ /leapfrogai/.model/

COPY packages/llama-cpp-python/main.py .
COPY packages/llama-cpp-python/config.yaml .
Expand Down
1 change: 1 addition & 0 deletions packages/llama-cpp-python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ To build and deploy just the llama-cpp-python Zarf package (from the root of the
> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already
```shell
pip install 'huggingface_hub[cli,hf_transfer]' # Used to download the model weights from huggingface
make build-llama-cpp-python LOCAL_VERSION=dev
uds zarf package deploy packages/llama-cpp-python/zarf-package-llama-cpp-python-*-dev.tar.zst --confirm
```
Expand Down
33 changes: 33 additions & 0 deletions packages/llama-cpp-python/chart/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,38 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
app: lfai-llama
{{- include "chart.selectorLabels" . | nindent 8 }}
spec:
# It's necessary to include the ###ZARF_DATA_INJECTION_MARKER### somewhere in the podspec, otherwise data injections will not occur.
initContainers:
- name: data-loader
image: cgr.dev/chainguard/bash:latest
securityContext:
runAsUser: 65532
runAsGroup: 65532
fsGroup: 65532
# This command looks for the Zarf "data injection marker" which is a timestamped file that is injected after everything else and marks the injection as complete.
command:
[
"sh",
"-c",
'while [ ! -f /data/.model/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"',
]
resources:
requests:
memory: "64Mi"
cpu: "200m"
limits:
memory: "128Mi"
cpu: "500m"
volumeMounts:
- name: leapfrogai-pv-storage
mountPath: /data
volumes:
- name: leapfrogai-pv-storage
persistentVolumeClaim:
claimName: lfai-llama-pv-claim
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
Expand All @@ -39,6 +69,9 @@ spec:
protocol: TCP
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumeMounts:
- name: leapfrogai-pv-storage
mountPath: "/data"
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
14 changes: 14 additions & 0 deletions packages/llama-cpp-python/chart/templates/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: lfai-llama-pv-claim
namespace: leapfrogai
spec:
{{- if .Values.persistence.storageClass }}
storageClassName: {{ .Values.persistence.storageClass }}
{{- end }}
accessModes:
- {{ .Values.persistence.accessModes | quote }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
5 changes: 5 additions & 0 deletions packages/llama-cpp-python/chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,8 @@ nodeSelector: {}
tolerations: []

affinity: {}

persistence:
size: 15Gi
accessModes: ReadWriteOnce
storageClass: "local-path"
2 changes: 1 addition & 1 deletion packages/llama-cpp-python/config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
model:
source: ".model/model.gguf"
source: "/data/.model/model.gguf"
max_context_length: 16384
stop_tokens:
- "<|im_end|>"
Expand Down
5 changes: 5 additions & 0 deletions packages/llama-cpp-python/llama-cpp-python-values.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
image:
tag: "###ZARF_CONST_IMAGE_VERSION###"

persistence:
size: ###ZARF_VAR_PVC_SIZE###
accessModes: ###ZARF_VAR_PVC_ACCESS_MODE###
storageClass: ###ZARF_VAR_PVC_STORAGE_CLASS###
3 changes: 3 additions & 0 deletions packages/llama-cpp-python/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
class Model:
backend_config = BackendConfig()

if not os.path.exists(backend_config.model.source):
raise ValueError(f"Model path ({backend_config.model.source}) does not exist")

llm = Llama(
model_path=backend_config.model.source,
n_ctx=backend_config.max_context_length,
Expand Down
41 changes: 29 additions & 12 deletions packages/llama-cpp-python/scripts/model_download.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
import os
import hashlib
import urllib.request

from huggingface_hub import hf_hub_download
REPO_ID = os.environ.get("REPO_ID", "")
FILENAME = os.environ.get("FILENAME", "")
REVISION = os.environ.get("REVISION", "main")
CHECKSUM = os.environ.get("SHA256_CHECKSUM", "")
OUTPUT_FILE = os.environ.get("OUTPUT_FILE", ".model/model.gguf")

REPO_ID = os.environ.get("REPO_ID", "TheBloke/SynthIA-7B-v2.0-GGUF")
FILENAME = os.environ.get("FILENAME", "synthia-7b-v2.0.Q4_K_M.gguf")
REVISION = os.environ.get("REVISION", "3f65d882253d1f15a113dabf473a7c02a004d2b5")

os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
def download_model():
# Check if the model is already downloaded.
if os.path.exists(OUTPUT_FILE) and CHECKSUM != "":
if hashlib.sha256(open(OUTPUT_FILE, "rb").read()).hexdigest() == CHECKSUM:
print("Model already downloaded.")
return

hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME,
local_dir=".model",
local_dir_use_symlinks=False,
revision=REVISION,
)
# Validate that require environment variables are provided
if REPO_ID == "" or FILENAME == "":
print("Please provide REPO_ID and FILENAME environment variables.")
return

# Download the model!
print("Downloading model... This may take a while.")
if not os.path.exists(".model"):
os.mkdir(".model")
urllib.request.urlretrieve(
f"https://huggingface.co/{REPO_ID}/resolve/{REVISION}/{FILENAME}", OUTPUT_FILE
)


if __name__ == "__main__":
download_model()
32 changes: 32 additions & 0 deletions packages/llama-cpp-python/zarf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@ constants:
- name: IMAGE_VERSION
value: "###ZARF_PKG_TMPL_IMAGE_VERSION###"

variables:
- name: PVC_SIZE
description: Size of the PVC used for model storage.
default: "15Gi"
pattern: "^[0-9]+[a-zA-Z]+$"
- name: PVC_ACCESS_MODE
description: Access mode of the PVC used for model storage.
default: "ReadWriteOnce"
pattern: "^(ReadWriteOnce|ReadOnlyMany|ReadWriteMany)$"
- name: PVC_STORAGE_CLASS
description: Storage class of the PVC used for model storage.
default: "local-path"

components:
- name: llama-cpp-python-model
required: true
Expand All @@ -26,3 +39,22 @@ components:
- "llama-cpp-python-values.yaml"
images:
- ghcr.io/defenseunicorns/leapfrogai/llama-cpp-python:###ZARF_PKG_TMPL_IMAGE_VERSION###
- cgr.dev/chainguard/bash:latest
dataInjections:
- source: .model/
target:
namespace: leapfrogai
selector: app=lfai-llama
container: data-loader
path: /data/.model
compress: true
actions:
onCreate:
before:
# NOTE: This assumes python is installed and in $PATH and 'huggingface_hub[cli,hf_transfer]' has been installed
- cmd: python scripts/model_download.py
env:
- REPO_ID=TheBloke/SynthIA-7B-v2.0-GGUF
- FILENAME=synthia-7b-v2.0.Q4_K_M.gguf
- REVISION=3f65d882253d1f15a113dabf473a7c02a004d2b5
- SHA256_CHECKSUM=5d6369d456446c40a9fd149525747d8dc494196686861c43b00f9230a166ba82
10 changes: 0 additions & 10 deletions packages/text-embeddings/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,13 @@ ENV PATH="/leapfrogai/.venv/bin:$PATH"
# copy and install all python dependencies
# NOTE: We are copying the leapfrog whl to this filename because installing 'optional extras' from
# a wheel requires the absolute path to the wheel file (instead of a wildcard whl)

COPY --from=sdk /leapfrogai/${SDK_DEST} ${SDK_DEST}
COPY packages/text-embeddings packages/text-embeddings

RUN rm -f packages/text-embeddings/build/*.whl
RUN python -m pip wheel packages/text-embeddings -w packages/text-embeddings/build --find-links=${SDK_DEST}
RUN pip install packages/text-embeddings/build/lfai_text_embeddings*.whl --no-index --find-links=packages/text-embeddings/build/


# download model
RUN python -m pip install -U huggingface_hub[cli,hf_transfer]
ARG REPO_ID="hkunlp/instructor-xl"
ARG REVISION="ce48b213095e647a6c3536364b9fa00daf57f436"
COPY packages/text-embeddings/scripts/model_download.py scripts/model_download.py
RUN REPO_ID=${REPO_ID} REVISION=${REVISION} python scripts/model_download.py

# hardened and slim python image
FROM ghcr.io/defenseunicorns/leapfrogai/python:3.11

Expand All @@ -38,7 +29,6 @@ ENV PATH="/leapfrogai/.venv/bin:$PATH"
WORKDIR /leapfrogai

COPY --from=builder /leapfrogai/.venv/ /leapfrogai/.venv/
COPY --from=builder /leapfrogai/.model/ /leapfrogai/.model/

COPY packages/text-embeddings/main.py .

Expand Down
1 change: 1 addition & 0 deletions packages/text-embeddings/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ To build and deploy just the text-embeddings Zarf package (from the root of the
> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already
```shell
pip install 'huggingface_hub[cli,hf_transfer]' # Used to download the model weights from huggingface
make build-text-embeddings LOCAL_VERSION=dev
uds zarf package deploy packages/text-embeddings/zarf-package-text-embeddings-*-dev.tar.zst --confirm
```
Expand Down
36 changes: 36 additions & 0 deletions packages/text-embeddings/chart/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,43 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
app: lfai-text-embeddings
{{- include "chart.selectorLabels" . | nindent 8 }}
spec:
{{- if gt (index .Values.resources.limits "nvidia.com/gpu") 0.0 }}
runtimeClassName: nvidia
{{- else if .Values.gpu.runtimeClassName }}
runtimeClassName: {{ .Values.gpu.runtimeClassName }}
{{- end }}
# It's necessary to include the ###ZARF_DATA_INJECTION_MARKER### somewhere in the podspec, otherwise data injections will not occur.
initContainers:
- name: data-loader
image: cgr.dev/chainguard/bash:latest
securityContext:
runAsUser: 65532
runAsGroup: 65532
fsGroup: 65532
# This command looks for the Zarf "data injection marker" which is a timestamped file that is injected after everything else and marks the injection as complete.
command:
[
"sh",
"-c",
'while [ ! -f /data/.model/###ZARF_DATA_INJECTION_MARKER### ]; do echo "waiting for zarf data sync" && sleep 1; done; echo "we are done waiting!"',
]
resources:
requests:
memory: "64Mi"
cpu: "200m"
limits:
memory: "128Mi"
cpu: "500m"
volumeMounts:
- name: leapfrogai-pv-storage
mountPath: /data
volumes:
- name: leapfrogai-pv-storage
persistentVolumeClaim:
claimName: lfai-text-embeddings-pv-claim
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
Expand All @@ -44,6 +74,12 @@ spec:
protocol: TCP
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumeMounts:
- name: leapfrogai-pv-storage
mountPath: "/data"
env:
- name: LFAI_MODEL_PATH
value: '/data/.model'
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
Expand Down
14 changes: 14 additions & 0 deletions packages/text-embeddings/chart/templates/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: lfai-text-embeddings-pv-claim
namespace: leapfrogai
spec:
{{- if .Values.persistence.storageClass }}
storageClassName: {{ .Values.persistence.storageClass }}
{{- end }}
accessModes:
- {{ .Values.persistence.accessModes | quote }}
resources:
requests:
storage: {{ .Values.persistence.size | quote }}
5 changes: 5 additions & 0 deletions packages/text-embeddings/chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,8 @@ nodeSelector: {}
tolerations: []

affinity: {}

persistence:
size: 15Gi
accessModes: ReadWriteOnce
storageClass: "local-path"
5 changes: 5 additions & 0 deletions packages/text-embeddings/embedding-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,8 @@ gpu:
resources:
limits:
nvidia.com/gpu: ###ZARF_VAR_GPU_LIMIT###

persistence:
size: ###ZARF_VAR_PVC_SIZE###
accessModes: ###ZARF_VAR_PVC_ACCESS_MODE###
storageClass: ###ZARF_VAR_PVC_STORAGE_CLASS###
Loading

0 comments on commit 33e4efb

Please sign in to comment.