Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added modelcar #149

Closed
wants to merge 10 commits into from
2 changes: 2 additions & 0 deletions bootstrap/ic-shared-llm/deployment-hftgi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ metadata:
namespace: ic-shared-llm
labels:
app: llm-flant5
annotations:
argocd.argoproj.io/sync-wave: "2"
spec:
replicas: 1
selector:
Expand Down
111 changes: 0 additions & 111 deletions bootstrap/ic-shared-llm/deployment.yaml

This file was deleted.

2 changes: 2 additions & 0 deletions bootstrap/ic-shared-llm/fix-odf-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ apiVersion: v1
metadata:
name: rook-ceph-operator-config
namespace: openshift-storage
annotations:
argocd.argoproj.io/sync-wave: "0"
data:
CSI_PLUGIN_TOLERATIONS: |
- key: nvidia.com/gpu
Expand Down
38 changes: 38 additions & 0 deletions bootstrap/ic-shared-llm/inference-service-granite-modelcar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
annotations:
openshift.io/display-name: granite-7b-instruct
serving.knative.openshift.io/enablePassthrough: 'true'
sidecar.istio.io/inject: 'true'
sidecar.istio.io/rewriteAppHTTPProbers: 'true'
argocd.argoproj.io/sync-wave: "2"
argocd.argoproj.io/compare-options: IgnoreExtraneous
argocd.argoproj.io/sync-options: Prune=false
name: granite-7b-instruct
namespace: ic-shared-llm
labels:
opendatahub.io/dashboard: 'true'
spec:
predictor:
maxReplicas: 1
minReplicas: 1
model:
modelFormat:
name: vLLM
name: ''
resources:
limits:
cpu: '6'
memory: 24Gi
nvidia.com/gpu: '1'
requests:
cpu: '1'
memory: 8Gi
nvidia.com/gpu: '1'
runtime: vllm
storageUri: oci://quay.io/rh-aiservices-bu/granite-7b-instruct-modelcar:0.1
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
49 changes: 49 additions & 0 deletions bootstrap/ic-shared-llm/job-enable-modelcar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
apiVersion: batch/v1
kind: Job
metadata:
name: patch-inferenceservice-config
namespace: ic-shared-llm
annotations:
argocd.argoproj.io/sync-wave: "1"
argocd.argoproj.io/hook: Sync
argocd.argoproj.io/hook-delete-policy: HookSucceeded
spec:
backoffLimit: 4
template:
spec:
serviceAccount: modelcar-enable-sa
serviceAccountName: modelcar-enable-sa
containers:
- name: patch-configmap
image: registry.redhat.io/openshift4/ose-cli:v4.15.0
command: ["/bin/sh", "-c"]
args:
- |
# Wait for the operator to be in "Ready" state
echo "Waiting for the operator to be Ready..."
until [ "$(oc get dsci -n redhat-ods-applications default-dsci -o jsonpath='{.status.phase}')" = "Ready" ]; do
echo "Operator not ready, retrying in 10s..."
sleep 10
done
echo "Operator is Ready!"

# Fetch current storageInitializer config
config=$(oc get configmap inferenceservice-config -n redhat-ods-applications -o jsonpath='{.data.storageInitializer}')

# Check if "enableModelcar" is already enabled
if echo "$config" | grep '"enableModelcar": false'; then
echo "Patching configmap to enable modelcar..."

# Modify the config to enable modelcar using sed
newValue=$(echo "$config" | sed 's/"enableModelcar": false/"enableModelcar": true/')
newValueEscaped=$(echo "$newValue" | sed 's/\"/\\\"/g')

# Patch the configmap with the new value
oc patch configmap inferenceservice-config -n redhat-ods-applications --type='json' -p "[{\"op\": \"replace\", \"path\": \"/data/storageInitializer\", \"value\": \"$newValueEscaped\"}]"
else
echo "Modelcar is already enabled, no patching needed."
fi

# Restart the KServe controller to apply changes
oc delete pod -n redhat-ods-applications -l control-plane=kserve-controller-manager
restartPolicy: OnFailure
12 changes: 6 additions & 6 deletions bootstrap/ic-shared-llm/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ resources:
# wave 0
- namespace.yaml
- fix-odf-config.yaml
- token.yaml
- rbac-job-enable-modelcar.yaml
# wave 1
- pvc.yaml
- job-enable-modelcar.yaml
# wave 2
- service-runtime-vllm-granite-modelcar.yaml
- pvc-hftgi.yaml
- deployment.yaml
- service.yaml
- inference-service-granite-modelcar.yaml
- deployment-hftgi.yaml
- service-hftgi.yaml
# wave 2
- service-hftgi.yaml
2 changes: 1 addition & 1 deletion bootstrap/ic-shared-llm/pvc-hftgi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
labels:
app: ic-shared-llm
annotations:
argocd.argoproj.io/sync-wave: "0"
argocd.argoproj.io/sync-wave: "2"
spec:
accessModes:
- ReadWriteMany
Expand Down
18 changes: 0 additions & 18 deletions bootstrap/ic-shared-llm/pvc.yaml

This file was deleted.

68 changes: 68 additions & 0 deletions bootstrap/ic-shared-llm/rbac-job-enable-modelcar.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: modelcar-enable-sa
namespace: ic-shared-llm
annotations:
argocd.argoproj.io/sync-wave: "0"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: modelcar-enable-patch-role
namespace: redhat-ods-applications
annotations:
argocd.argoproj.io/sync-wave: "0"
rules:
- apiGroups: ["redhat.com"]
resources: ["dsci"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: modelcar-enable-patch-rolebinding
namespace: redhat-ods-applications
annotations:
argocd.argoproj.io/sync-wave: "0"
subjects:
- kind: ServiceAccount
name: modelcar-enable-sa
namespace: ic-shared-llm
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: modelcar-enable-patch-role # Fixed to bind the correct Role
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: modelcar-dsc-read
annotations:
argocd.argoproj.io/sync-wave: "0"
rules:
- apiGroups: ["dscinitialization.opendatahub.io"]
resources: ["dscinitializations"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: modelcar-dsc-read-binding
annotations:
argocd.argoproj.io/sync-wave: "0"
subjects:
- kind: ServiceAccount
name: modelcar-enable-sa
namespace: ic-shared-llm
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: modelcar-dsc-read
3 changes: 3 additions & 0 deletions bootstrap/ic-shared-llm/service-hftgi.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
kind: Service
apiVersion: v1

metadata:
name: llm-flant5
namespace: ic-shared-llm
labels:
app: llm-flant5
annotations:
argocd.argoproj.io/sync-wave: "2"
spec:
clusterIP: None
ipFamilies:
Expand Down
Loading
Loading