diff --git a/bootstrap/ic-shared-llm/deployment-hftgi.yaml b/bootstrap/ic-shared-llm/deployment-hftgi.yaml
index 32d9d93f..845af92a 100644
--- a/bootstrap/ic-shared-llm/deployment-hftgi.yaml
+++ b/bootstrap/ic-shared-llm/deployment-hftgi.yaml
@@ -5,6 +5,8 @@ metadata:
   namespace: ic-shared-llm
   labels:
     app: llm-flant5
+  annotations:
+    argocd.argoproj.io/sync-wave: "2"
 spec:
   replicas: 1
   selector:
diff --git a/bootstrap/ic-shared-llm/deployment.yaml b/bootstrap/ic-shared-llm/deployment.yaml
deleted file mode 100644
index 6ba8c63c..00000000
--- a/bootstrap/ic-shared-llm/deployment.yaml
+++ /dev/null
@@ -1,111 +0,0 @@
-kind: Deployment
-apiVersion: apps/v1
-metadata:
-  name: llm
-  namespace: ic-shared-llm
-  labels:
-    app: llm
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: llm
-  template:
-    metadata:
-      creationTimestamp: null
-      labels:
-        app: llm
-    spec:
-      restartPolicy: Always
-      schedulerName: default-scheduler
-      affinity: {}
-      terminationGracePeriodSeconds: 120
-      securityContext: {}
-      containers:
-        - resources:
-            limits:
-              cpu: '8'
-              memory: 24Gi
-              nvidia.com/gpu: '1'
-            requests:
-              cpu: '6'
-          readinessProbe:
-            httpGet:
-              path: /health
-              port: http
-              scheme: HTTP
-            timeoutSeconds: 5
-            periodSeconds: 30
-            successThreshold: 1
-            failureThreshold: 3
-          terminationMessagePath: /dev/termination-log
-          name: server
-          livenessProbe:
-            httpGet:
-              path: /health
-              port: http
-              scheme: HTTP
-            timeoutSeconds: 8
-            periodSeconds: 100
-            successThreshold: 1
-            failureThreshold: 3
-          env:
-            - name: HUGGING_FACE_HUB_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: hftoken
-                  key: token
-          args: [
-            "--model",
-            "mistralai/Mistral-7B-Instruct-v0.2",
-            "--download-dir",
-            "/models-cache",
-            "--dtype", "float16",
-            "--max-model-len", "6144" ]
-          securityContext:
-            capabilities:
-              drop:
-                - ALL
-            runAsNonRoot: true
-            allowPrivilegeEscalation: false
-            seccompProfile:
-              type: RuntimeDefault
-          ports:
-            - name: http
-              containerPort: 8000
-              protocol: TCP
-          imagePullPolicy: IfNotPresent
-          startupProbe:
-            httpGet:
-              path: /health
-              port: http
-              scheme: HTTP
-            timeoutSeconds: 1
-            periodSeconds: 30
-            successThreshold: 1
-            failureThreshold: 24
-            initialDelaySeconds: 60
-          volumeMounts:
-            - name: models-cache
-              mountPath: /models-cache
-            - name: shm
-              mountPath: /dev/shm
-          terminationMessagePolicy: File
-          image: 'quay.io/rh-aiservices-bu/vllm-openai-ubi9:0.4.2'
-      volumes:
-        - name: models-cache
-          persistentVolumeClaim:
-            claimName: models-cache
-        - name: shm
-          emptyDir:
-            medium: Memory
-            sizeLimit: 1Gi
-      dnsPolicy: ClusterFirst
-      tolerations:
-        - key: nvidia.com/gpu
-          operator: Exists
-          effect: NoSchedule
-  strategy:
-    type: Recreate
-  revisionHistoryLimit: 10
-  progressDeadlineSeconds: 600
\ No newline at end of file
diff --git a/bootstrap/ic-shared-llm/fix-odf-config.yaml b/bootstrap/ic-shared-llm/fix-odf-config.yaml
index c2e6284a..2dfd4175 100644
--- a/bootstrap/ic-shared-llm/fix-odf-config.yaml
+++ b/bootstrap/ic-shared-llm/fix-odf-config.yaml
@@ -3,6 +3,8 @@ apiVersion: v1
 metadata:
   name: rook-ceph-operator-config
   namespace: openshift-storage
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
 data:
   CSI_PLUGIN_TOLERATIONS: |
     - key: nvidia.com/gpu
diff --git a/bootstrap/ic-shared-llm/inference-service-granite-modelcar.yaml b/bootstrap/ic-shared-llm/inference-service-granite-modelcar.yaml
new file mode 100644
index 00000000..b4817bc0
--- /dev/null
+++ b/bootstrap/ic-shared-llm/inference-service-granite-modelcar.yaml
@@ -0,0 +1,38 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  annotations:
+    openshift.io/display-name: granite-7b-instruct
+    serving.knative.openshift.io/enablePassthrough: 'true'
+    sidecar.istio.io/inject: 'true'
+    sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+    argocd.argoproj.io/sync-wave: "2"
+    argocd.argoproj.io/compare-options: IgnoreExtraneous
+    argocd.argoproj.io/sync-options: Prune=false
+  name: granite-7b-instruct
+  namespace: ic-shared-llm
+  labels:
+    opendatahub.io/dashboard: 'true'
+spec:
+  predictor:
+    maxReplicas: 1
+    minReplicas: 1
+    model:
+      modelFormat:
+        name: vLLM
+      name: ''
+      resources:
+        limits:
+          cpu: '6'
+          memory: 24Gi
+          nvidia.com/gpu: '1'
+        requests:
+          cpu: '1'
+          memory: 8Gi
+          nvidia.com/gpu: '1'
+      runtime: vllm
+      storageUri: oci://quay.io/rh-aiservices-bu/granite-7b-instruct-modelcar:0.1
+    tolerations:
+      - effect: NoSchedule
+        key: nvidia.com/gpu
+        operator: Exists
\ No newline at end of file
diff --git a/bootstrap/ic-shared-llm/job-enable-modelcar.yaml b/bootstrap/ic-shared-llm/job-enable-modelcar.yaml
new file mode 100644
index 00000000..7a90611c
--- /dev/null
+++ b/bootstrap/ic-shared-llm/job-enable-modelcar.yaml
@@ -0,0 +1,49 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: patch-inferenceservice-config
+  namespace: ic-shared-llm
+annotations:
+  argocd.argoproj.io/sync-wave: "1"
+  argocd.argoproj.io/hook: Sync
+  argocd.argoproj.io/hook-delete-policy: HookSucceeded
+spec:
+  backoffLimit: 4
+  template:
+    spec:
+      serviceAccount: modelcar-enable-sa
+      serviceAccountName: modelcar-enable-sa
+      containers:
+      - name: patch-configmap
+        image: registry.redhat.io/openshift4/ose-cli:v4.15.0
+        command: ["/bin/sh", "-c"]
+        args:
+          - |
+           # Wait for the operator to be in "Ready" state
+            echo "Waiting for the operator to be Ready..."
+            until [ "$(oc get dsci -n redhat-ods-applications default-dsci -o jsonpath='{.status.phase}')" = "Ready" ]; do
+              echo "Operator not ready, retrying in 10s..."
+              sleep 10
+            done
+            echo "Operator is Ready!"
+
+            # Fetch current storageInitializer config
+            config=$(oc get configmap inferenceservice-config -n redhat-ods-applications -o jsonpath='{.data.storageInitializer}')
+
+            # Check if "enableModelcar" is already enabled
+            if echo "$config" | grep '"enableModelcar": false'; then
+              echo "Patching configmap to enable modelcar..."
+              
+              # Modify the config to enable modelcar using sed
+              newValue=$(echo "$config" | sed 's/"enableModelcar": false/"enableModelcar": true/')
+              newValueEscaped=$(echo "$newValue" | sed 's/\"/\\\"/g')
+
+              # Patch the configmap with the new value
+              oc patch configmap inferenceservice-config -n redhat-ods-applications --type='json' -p "[{\"op\": \"replace\", \"path\": \"/data/storageInitializer\", \"value\": \"$newValueEscaped\"}]"
+            else
+              echo "Modelcar is already enabled, no patching needed."
+            fi
+
+            # Restart the KServe controller to apply changes
+            oc delete pod -n redhat-ods-applications -l control-plane=kserve-controller-manager
+      restartPolicy: OnFailure
diff --git a/bootstrap/ic-shared-llm/kustomization.yaml b/bootstrap/ic-shared-llm/kustomization.yaml
index 46daabff..02d3b969 100644
--- a/bootstrap/ic-shared-llm/kustomization.yaml
+++ b/bootstrap/ic-shared-llm/kustomization.yaml
@@ -9,12 +9,12 @@ resources:
 # wave 0
 - namespace.yaml
 - fix-odf-config.yaml
-- token.yaml
+- rbac-job-enable-modelcar.yaml
 # wave 1
-- pvc.yaml
+- job-enable-modelcar.yaml
+# wave 2
+- service-runtime-vllm-granite-modelcar.yaml
 - pvc-hftgi.yaml
-- deployment.yaml
-- service.yaml
+- inference-service-granite-modelcar.yaml
 - deployment-hftgi.yaml
-- service-hftgi.yaml
-# wave 2
+- service-hftgi.yaml
\ No newline at end of file
diff --git a/bootstrap/ic-shared-llm/pvc-hftgi.yaml b/bootstrap/ic-shared-llm/pvc-hftgi.yaml
index 0a1950bb..d503bdc3 100644
--- a/bootstrap/ic-shared-llm/pvc-hftgi.yaml
+++ b/bootstrap/ic-shared-llm/pvc-hftgi.yaml
@@ -7,7 +7,7 @@ metadata:
   labels:
     app: ic-shared-llm
   annotations:
-    argocd.argoproj.io/sync-wave: "0"
+    argocd.argoproj.io/sync-wave: "2"
 spec:
   accessModes:
     - ReadWriteMany
diff --git a/bootstrap/ic-shared-llm/pvc.yaml b/bootstrap/ic-shared-llm/pvc.yaml
deleted file mode 100644
index be8f7a86..00000000
--- a/bootstrap/ic-shared-llm/pvc.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
----
-apiVersion: v1
-kind: PersistentVolumeClaim
-metadata:
-  name: models-cache
-  namespace: ic-shared-llm
-  labels:
-    app: ic-shared-llm
-  annotations:
-    argocd.argoproj.io/sync-wave: "0"
-spec:
-  accessModes:
-    - ReadWriteMany
-  resources:
-    requests:
-      storage: 50Gi
-  storageClassName: ocs-storagecluster-cephfs
-  volumeMode: Filesystem
\ No newline at end of file
diff --git a/bootstrap/ic-shared-llm/rbac-job-enable-modelcar.yaml b/bootstrap/ic-shared-llm/rbac-job-enable-modelcar.yaml
new file mode 100644
index 00000000..33c1e457
--- /dev/null
+++ b/bootstrap/ic-shared-llm/rbac-job-enable-modelcar.yaml
@@ -0,0 +1,68 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: modelcar-enable-sa
+  namespace: ic-shared-llm
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: modelcar-enable-patch-role
+  namespace: redhat-ods-applications
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+rules:
+- apiGroups: ["redhat.com"]
+  resources: ["dsci"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["configmaps"]
+  verbs: ["get", "patch"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "list", "delete"] 
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: modelcar-enable-patch-rolebinding
+  namespace: redhat-ods-applications
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+subjects:
+- kind: ServiceAccount
+  name: modelcar-enable-sa
+  namespace: ic-shared-llm 
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: modelcar-enable-patch-role  # Fixed to bind the correct Role
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: modelcar-dsc-read
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+rules:
+- apiGroups: ["dscinitialization.opendatahub.io"]
+  resources: ["dscinitializations"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: modelcar-dsc-read-binding
+  annotations:
+    argocd.argoproj.io/sync-wave: "0"
+subjects:
+- kind: ServiceAccount
+  name: modelcar-enable-sa
+  namespace: ic-shared-llm 
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: modelcar-dsc-read
diff --git a/bootstrap/ic-shared-llm/service-hftgi.yaml b/bootstrap/ic-shared-llm/service-hftgi.yaml
index 08b3b8ba..01700f92 100644
--- a/bootstrap/ic-shared-llm/service-hftgi.yaml
+++ b/bootstrap/ic-shared-llm/service-hftgi.yaml
@@ -1,10 +1,13 @@
 kind: Service
 apiVersion: v1
+
 metadata:
   name: llm-flant5
   namespace: ic-shared-llm
   labels:
     app: llm-flant5
+  annotations:
+    argocd.argoproj.io/sync-wave: "2"
 spec:
   clusterIP: None
   ipFamilies:
diff --git a/bootstrap/ic-shared-llm/service-runtime-vllm-granite-modelcar.yaml b/bootstrap/ic-shared-llm/service-runtime-vllm-granite-modelcar.yaml
new file mode 100644
index 00000000..26c01bf1
--- /dev/null
+++ b/bootstrap/ic-shared-llm/service-runtime-vllm-granite-modelcar.yaml
@@ -0,0 +1,50 @@
+---
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata:
+  annotations:
+    opendatahub.io/accelerator-name: migrated-gpu
+    opendatahub.io/apiProtocol: REST
+    opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
+    opendatahub.io/template-display-name: vLLM ServingRuntime for KServe
+    opendatahub.io/template-name: vllm-runtime
+    openshift.io/display-name: vllm
+    argocd.argoproj.io/sync-wave: "2"
+  name: vllm
+  namespace: ic-shared-llm
+  labels:
+    opendatahub.io/dashboard: 'true'
+spec:
+  annotations:
+    prometheus.io/path: /metrics
+    prometheus.io/port: '8080'
+  containers:
+    - args:
+        - '--port=8080'
+        - '--model=/mnt/models'
+        - '--served-model-name={{.Name}}'
+        - '--distributed-executor-backend=mp'
+      command:
+        - python
+        - '-m'
+        - vllm.entrypoints.openai.api_server
+      env:
+        - name: HF_HOME
+          value: /tmp/hf_home
+      image: 'quay.io/modh/vllm@sha256:b51fde66f162f1a78e8c027320dddf214732d5345953b1599a84fe0f0168c619'
+      name: kserve-container
+      ports:
+        - containerPort: 8080
+          protocol: TCP
+      volumeMounts:
+        - mountPath: /dev/shm
+          name: shm
+  multiModel: false
+  supportedModelFormats:
+    - autoSelect: true
+      name: vLLM
+  volumes:
+    - emptyDir:
+        medium: Memory
+        sizeLimit: 2Gi
+      name: shm
\ No newline at end of file
diff --git a/bootstrap/ic-shared-llm/service.yaml b/bootstrap/ic-shared-llm/service.yaml
deleted file mode 100644
index 10a107a9..00000000
--- a/bootstrap/ic-shared-llm/service.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-kind: Service
-apiVersion: v1
-metadata:
-  name: llm
-  namespace: ic-shared-llm
-  labels:
-    app: llm
-spec:
-  clusterIP: None
-  ipFamilies:
-    - IPv4
-  ports:
-    - name: http
-      protocol: TCP
-      port: 8000
-      targetPort: http
-  type: ClusterIP
-  ipFamilyPolicy: SingleStack
-  sessionAffinity: None
-  selector:
-    app: llm
\ No newline at end of file
diff --git a/bootstrap/ic-shared-llm/token.yaml b/bootstrap/ic-shared-llm/token.yaml
deleted file mode 100644
index bc705ffb..00000000
--- a/bootstrap/ic-shared-llm/token.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-kind: Secret
-apiVersion: v1
-metadata:
-  name: hftoken
-  namespace: ic-shared-llm
-data:
-  token: aGZfUkhKeElqSElXcGJXb3NKVlJsa2VLQ2VVcmlxZ2JsS0VDRgo=
-type: Opaque
\ No newline at end of file