rh-aiservices-bu · rcarrata · Sep 27, 2024 · Sep 27, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/bootstrap/ic-shared-llm/deployment-hftgi.yaml b/bootstrap/ic-shared-llm/deployment-hftgi.yaml
@@ -5,6 +5,8 @@ metadata:
   namespace: ic-shared-llm
   labels:
     app: llm-flant5
+  annotations:
+    argocd.argoproj.io/sync-wave: "2"
 spec:
   replicas: 1
   selector:

diff --git a/bootstrap/ic-shared-llm/deployment.yaml b/bootstrap/ic-shared-llm/deployment.yaml
diff --git a/bootstrap/ic-shared-llm/fix-odf-config.yaml b/bootstrap/ic-shared-llm/fix-odf-config.yaml
@@ -3,6 +3,8 @@ apiVersion: v1
 metadata:
   name: rook-ceph-operator-config
   namespace: openshift-storage
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
 data:
   CSI_PLUGIN_TOLERATIONS: |
     - key: nvidia.com/gpu

diff --git a/bootstrap/ic-shared-llm/inference-service-granite-modelcar.yaml b/bootstrap/ic-shared-llm/inference-service-granite-modelcar.yaml
@@ -0,0 +1,38 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    openshift.io/display-name: granite-7b-instruct
+    serving.knative.openshift.io/enablePassthrough: 'true'
+    sidecar.istio.io/inject: 'true'
+    sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+    argocd.argoproj.io/sync-wave: "2"
+    argocd.argoproj.io/compare-options: IgnoreExtraneous
+    argocd.argoproj.io/sync-options: Prune=false
+  name: granite-7b-instruct
+  namespace: ic-shared-llm
+  labels:
+    opendatahub.io/dashboard: 'true'
+spec:
+  predictor:
+    maxReplicas: 1
+    minReplicas: 1
+    model:
+      modelFormat:
+        name: vLLM
+      name: ''
+      resources:
+        limits:
+          cpu: '6'
+          memory: 24Gi
+          nvidia.com/gpu: '1'
+        requests:
+          cpu: '1'
+          memory: 8Gi
+          nvidia.com/gpu: '1'
+      runtime: vllm
+      storageUri: oci://quay.io/rh-aiservices-bu/granite-7b-instruct-modelcar:0.1
+    tolerations:
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
diff --git a/bootstrap/ic-shared-llm/job-enable-modelcar.yaml b/bootstrap/ic-shared-llm/job-enable-modelcar.yaml
@@ -0,0 +1,49 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: patch-inferenceservice-config
+  namespace: ic-shared-llm
+annotations:
+  argocd.argoproj.io/sync-wave: "1"
+  argocd.argoproj.io/hook: Sync
+  argocd.argoproj.io/hook-delete-policy: HookSucceeded
+spec:
+  backoffLimit: 4
+  template:
+    spec:
+      serviceAccount: modelcar-enable-sa
+      serviceAccountName: modelcar-enable-sa
+      containers:
+      - name: patch-configmap
+        image: registry.redhat.io/openshift4/ose-cli:v4.15.0
+        command: ["/bin/sh", "-c"]
+        args:
+          - |
+           # Wait for the operator to be in "Ready" state
+            echo "Waiting for the operator to be Ready..."
+            until [ "$(oc get dsci -n redhat-ods-applications default-dsci -o jsonpath='{.status.phase}')" = "Ready" ]; do
+              echo "Operator not ready, retrying in 10s..."
+              sleep 10
+            done
+            echo "Operator is Ready!"
+
+            # Fetch current storageInitializer config
+            config=$(oc get configmap inferenceservice-config -n redhat-ods-applications -o jsonpath='{.data.storageInitializer}')
+
+            # Check if "enableModelcar" is already enabled
+            if echo "$config" | grep '"enableModelcar": false'; then
+              echo "Patching configmap to enable modelcar..."
+
+              # Modify the config to enable modelcar using sed
+              newValue=$(echo "$config" | sed 's/"enableModelcar": false/"enableModelcar": true/')
+              newValueEscaped=$(echo "$newValue" | sed 's/\"/\\\"/g')
+
+              # Patch the configmap with the new value
+              oc patch configmap inferenceservice-config -n redhat-ods-applications --type='json' -p "[{\"op\": \"replace\", \"path\": \"/data/storageInitializer\", \"value\": \"$newValueEscaped\"}]"
+            else
+              echo "Modelcar is already enabled, no patching needed."
+            fi
+
+            # Restart the KServe controller to apply changes
+            oc delete pod -n redhat-ods-applications -l control-plane=kserve-controller-manager
+      restartPolicy: OnFailure
diff --git a/bootstrap/ic-shared-llm/kustomization.yaml b/bootstrap/ic-shared-llm/kustomization.yaml
@@ -9,12 +9,12 @@ resources:
 # wave 0
 - namespace.yaml
 - fix-odf-config.yaml
-- token.yaml
+- rbac-job-enable-modelcar.yaml
 # wave 1
-- pvc.yaml
+- job-enable-modelcar.yaml
+# wave 2
+- service-runtime-vllm-granite-modelcar.yaml
 - pvc-hftgi.yaml
-- deployment.yaml
-- service.yaml
+- inference-service-granite-modelcar.yaml
 - deployment-hftgi.yaml
-- service-hftgi.yaml
-# wave 2
+- service-hftgi.yaml
diff --git a/bootstrap/ic-shared-llm/pvc-hftgi.yaml b/bootstrap/ic-shared-llm/pvc-hftgi.yaml
@@ -7,7 +7,7 @@ metadata:
   labels:
     app: ic-shared-llm
   annotations:
-    argocd.argoproj.io/sync-wave: "0"
+    argocd.argoproj.io/sync-wave: "2"
 spec:
   accessModes:
     - ReadWriteMany

diff --git a/bootstrap/ic-shared-llm/pvc.yaml b/bootstrap/ic-shared-llm/pvc.yaml
diff --git a/bootstrap/ic-shared-llm/rbac-job-enable-modelcar.yaml b/bootstrap/ic-shared-llm/rbac-job-enable-modelcar.yaml
@@ -0,0 +1,68 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: modelcar-enable-sa
+  namespace: ic-shared-llm
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: modelcar-enable-patch-role
+  namespace: redhat-ods-applications
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+rules:
+- apiGroups: ["redhat.com"]
+  resources: ["dsci"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["configmaps"]
+  verbs: ["get", "patch"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "list", "delete"] 
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: modelcar-enable-patch-rolebinding
+  namespace: redhat-ods-applications
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+subjects:
+- kind: ServiceAccount
+  name: modelcar-enable-sa
+  namespace: ic-shared-llm 
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: modelcar-enable-patch-role  # Fixed to bind the correct Role
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: modelcar-dsc-read
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+rules:
+- apiGroups: ["dscinitialization.opendatahub.io"]
+  resources: ["dscinitializations"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: modelcar-dsc-read-binding
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+subjects:
+- kind: ServiceAccount
+  name: modelcar-enable-sa
+  namespace: ic-shared-llm 
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: modelcar-dsc-read
diff --git a/bootstrap/ic-shared-llm/service-hftgi.yaml b/bootstrap/ic-shared-llm/service-hftgi.yaml
@@ -1,10 +1,13 @@
 kind: Service
 apiVersion: v1
+
 metadata:
   name: llm-flant5
   namespace: ic-shared-llm
   labels:
     app: llm-flant5
+  annotations:
+    argocd.argoproj.io/sync-wave: "2"
 spec:
   clusterIP: None
   ipFamilies: