diff --git a/assets/storage_class_template.yaml b/assets/storage_class_template.yaml
deleted file mode 100644
index 3b8bb85..0000000
--- a/assets/storage_class_template.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-kind: StorageClass
-apiVersion: storage.k8s.io/v1
-metadata:
-  name: $sc_name
-provisioner: driver.longhorn.io
-allowVolumeExpansion: true
-reclaimPolicy: Delete
-volumeBindingMode: WaitForFirstConsumer
-parameters:
-  shareManagerNodeSelector: $sc_label_selector
-  numberOfReplicas: "$sc_replicas"
-  staleReplicaTimeout: "2880"
-  fromBackup: ""
-  fsType: "ext4"
-  #nfsOptions: "vers=4.2,noresvport,softerr,timeo=600,retrans=5"
-  #nfsOptions: "hard,timeo=50,retrans=1"
-
diff --git a/kalavai_client/cli.py b/kalavai_client/cli.py
index a267b41..d92262a 100644
--- a/kalavai_client/cli.py
+++ b/kalavai_client/cli.py
@@ -3,7 +3,6 @@
 import os
 import json
 import uuid
-from string import Template
 import time
 import socket
 from pathlib import Path
@@ -54,10 +53,6 @@
     WATCHER_PORT_KEY,
     MANDATORY_TOKEN_FIELDS,
     USER_NODE_LABEL_KEY,
-    DEPLOY_HELIOS_KEY,
-    LONGHORN_UI_PORT_KEY,
-    LONGHORN_MANAGER_PORT_KEY,
-    KALAVAI_API_ENDPOINT_KEY,
     IS_PUBLIC_POOL_KEY
 )
 from kalavai_client.cluster import (
@@ -66,26 +61,26 @@
 
 
 KALAVAI_PLATFORM_URL = os.getenv("KALAVAI_PLATFORM_URL", "https://platform.kalavai.net")
-KALAVAI_API_ENDPOINT = os.getenv("KALAVAI_API_ENDPOINT", "https://platform.kalavai.net/_/api")
 LOCAL_TEMPLATES_DIR = os.getenv("LOCAL_TEMPLATES_DIR", None)
 VERSION = 1
 RESOURCE_EXCLUDE = ["ephemeral-storage", "hugepages-1Gi", "hugepages-2Mi", "pods"]
 CORE_NAMESPACES = ["lws-system", "kube-system", "gpu-operator", "kalavai"]
-TEMPLATE_LABEL = "kalavai.lws.name"
+TEMPLATE_LABEL = "kalavai.job.name"
 RAY_LABEL = "kalavai.ray.name"
 PVC_NAME_LABEL = "kalavai.storage.name"
+POOL_CONFIG_TEMPLATE = resource_path("assets/pool_config_template.yaml")
+POOL_CONFIG_DEFAULT_VALUES = resource_path("assets/pool_config_values.yaml")
 STORAGE_CLASS_NAME = "longhorn-rwx"
 STORAGE_CLASS_LABEL = "kalavai.storage.enabled"
 DEFAULT_STORAGE_NAME = "pool-cache"
 DEFAULT_STORAGE_SIZE = 5
-DEFAULT_STORAGE_REPLICAS = 1
 USER_NODE_LABEL = "kalavai.cluster.user"
 KUBE_VERSION = os.getenv("KALAVAI_KUBE_VERSION", "v1.31.1+k3s1")
 DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker")
 FORBIDEDEN_IPS = ["127.0.0.1"]
 # kalavai templates
 HELM_APPS_FILE = resource_path("assets/apps.yaml")
-STORAGE_CLASS_TEMPLATE_FILE = resource_path("assets/storage_class_template.yaml")
+HELM_APPS_VALUES = resource_path("assets/apps_values.yaml")
 # user specific config files
 USER_HELM_APPS_FILE = user_path("apps.yaml")
 USER_KUBECONFIG_FILE = user_path("kubeconfig")
@@ -192,6 +187,30 @@ def init_user_workspace():
         except Exception as e:
             console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
 
+def pool_init():
+    """Deploy configured objects to initialise pool"""
+    # load template config and populate with values
+    sidecar_template_yaml = load_template(
+        template_path=POOL_CONFIG_TEMPLATE,
+        values={},
+        default_values_path=POOL_CONFIG_DEFAULT_VALUES)
+
+    try:
+        result = request_to_server(
+            method="post",
+            endpoint="/v1/deploy_generic_model",
+            data={"config": sidecar_template_yaml},
+            server_creds=USER_LOCAL_SERVER_FILE,
+            user_cookie=USER_COOKIE
+        )
+        if len(result['failed']) > 0:
+            console.log(f"[red]Error when deploying pool config\n\n{result['failed']}")
+        if len(result['successful']) > 0:
+            console.log(f"[green]Deployed pool config!")
+    except Exception as e:
+        console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
+    
+
 def select_ip_address(subnet=None):
     ips = []
     for iface in ni.interfaces():
@@ -414,7 +433,7 @@ def pool__list(*others, user_only=False):
     console.log("[white]Use [yellow]kalavai pool join <join key> [white]to join a public pool")
 
 @arguably.command
-def pool__start(cluster_name, *others,  ip_address: str=None, location: str=None):
+def pool__start(cluster_name, *others,  ip_address: str=None, location: str=None, app_values: str=HELM_APPS_VALUES):
     """
     Start Kalavai pool and start/resume sharing resources.
 
@@ -455,8 +474,6 @@ def pool__start(cluster_name, *others,  ip_address: str=None, location: str=None
     write_auth_key = str(uuid.uuid4())
     readonly_auth_key = str(uuid.uuid4())
     watcher_port = 31000
-    longhorn_ui_port = 30000
-    longhorn_manager_port = 30001
     watcher_service = f"{ip_address}:{watcher_port}"
     values = {
         CLUSTER_NAME_KEY: cluster_name,
@@ -465,13 +482,9 @@ def pool__start(cluster_name, *others,  ip_address: str=None, location: str=None
         READONLY_AUTH_KEY: readonly_auth_key,
         WRITE_AUTH_KEY: write_auth_key,
         WATCHER_PORT_KEY: watcher_port,
-        LONGHORN_UI_PORT_KEY: longhorn_ui_port,
-        LONGHORN_MANAGER_PORT_KEY: longhorn_manager_port,
         WATCHER_SERVICE_KEY: watcher_service,
         USER_NODE_LABEL_KEY: USER_NODE_LABEL,
-        DEPLOY_HELIOS_KEY: location is not None,
-        IS_PUBLIC_POOL_KEY: location is not None,
-        KALAVAI_API_ENDPOINT_KEY: KALAVAI_API_ENDPOINT
+        IS_PUBLIC_POOL_KEY: location is not None
     }
     
     # 1. start k3s server
@@ -499,12 +512,14 @@ def pool__start(cluster_name, *others,  ip_address: str=None, location: str=None
     
     console.log("Install dependencies...")
     # set template values in helmfile
-    with open(HELM_APPS_FILE, "r") as f:
-        config = Template(f.read())
-        config = config.substitute(values)
+    helm_yaml = load_template(
+        template_path=HELM_APPS_FILE,
+        values=values,
+        default_values_path=app_values,
+        force_defaults=True)
     
     with open(USER_HELM_APPS_FILE, "w") as f:
-        f.write(config)
+        f.write(helm_yaml)
     CLUSTER.update_dependencies(
         dependencies_file=USER_HELM_APPS_FILE
     )
@@ -526,8 +541,8 @@ def pool__start(cluster_name, *others,  ip_address: str=None, location: str=None
             break
     console.log("Initialise user workspace...")
     init_user_workspace()
-    console.log(f"Initialising storage: {DEFAULT_STORAGE_NAME} ({DEFAULT_STORAGE_SIZE}Gi)...")  
-    storage__init()
+    console.log(f"Initialising pool config...")  
+    pool_init()
     storage__create()
 
     return None
@@ -913,40 +928,6 @@ def pool__status(*others, log_file=None):
         for log in logs:
             console.log(f"{log}\n")
 
-@arguably.command
-def storage__init(replicas=DEFAULT_STORAGE_REPLICAS, *others):
-    """
-    Create storage for the cluster
-    """
-    try:
-        CLUSTER.validate_cluster()
-    except Exception as e:
-        console.log(f"[red]Problems with your pool: {str(e)}")
-        return
-    
-    sidecar_template_yaml = load_template(
-        template_path=STORAGE_CLASS_TEMPLATE_FILE,
-        values={
-            "sc_name": STORAGE_CLASS_NAME,
-            "sc_label_selector": f"{STORAGE_CLASS_LABEL}:True",
-            "sc_replicas": replicas
-        }
-    )
-    try:
-        result = request_to_server(
-            method="post",
-            endpoint="/v1/deploy_generic_model",
-            data={"config": sidecar_template_yaml},
-            server_creds=USER_LOCAL_SERVER_FILE,
-            user_cookie=USER_COOKIE
-        )
-        if len(result['failed']) > 0:
-            console.log(f"[red]Error when creating storage class\n\n{result['failed']}")
-        if len(result['successful']) > 0:
-            console.log(f"[green]Created storage class: {STORAGE_CLASS_NAME} ({replicas} replicas)")
-    except Exception as e:
-        console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
-
 @arguably.command
 def storage__create(name=DEFAULT_STORAGE_NAME, storage=DEFAULT_STORAGE_SIZE, *others):
     """
@@ -1230,9 +1211,12 @@ def generate_gpu_annotation(input_message, values, value_key, annotation_key):
     # deploy template with kube-watcher
     data = {
         "object": {
-            "group": "leaderworkerset.x-k8s.io",
-            "api_version": "v1",
-            "plural": "leaderworkersets"
+            "group": "batch.volcano.sh",
+            "api_version": "v1alpha1",
+            "plural": "jobs"
+            # "group": "leaderworkerset.x-k8s.io",
+            # "api_version": "v1",
+            # "plural": "leaderworkersets"
         },
         "body": template_yaml
     }
@@ -1366,9 +1350,12 @@ def job__list(*others):
         return
 
     data = {
-        "group": "leaderworkerset.x-k8s.io",
-        "api_version": "v1",
-        "plural": "leaderworkersets",
+        "group": "batch.volcano.sh",
+        "api_version": "v1alpha1",
+        "plural": "jobs"
+        # "group": "leaderworkerset.x-k8s.io",
+        # "api_version": "v1",
+        # "plural": "leaderworkersets",
     }
     try:
         result = request_to_server(
@@ -1378,7 +1365,7 @@ def job__list(*others):
             server_creds=USER_LOCAL_SERVER_FILE,
             user_cookie=USER_COOKIE
         )
-        deployment_names = [d["metadata"]["name"] for d in result["items"]]
+        deployment_names = [d["metadata"]["labels"][TEMPLATE_LABEL] for d in result["items"]]
 
     except Exception as e:
         console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
@@ -1394,9 +1381,12 @@ def job__list(*others):
         try:
             # get status for deployment
             data = {
-                "group": "leaderworkerset.x-k8s.io",
-                "api_version": "v1",
-                "plural": "leaderworkersets",
+                "group": "batch.volcano.sh",
+                "api_version": "v1alpha1",
+                "plural": "jobs",
+                # "group": "leaderworkerset.x-k8s.io",
+                # "api_version": "v1",
+                # "plural": "leaderworkersets",
                 "name": deployment
             }
             result = request_to_server(
@@ -1408,14 +1398,15 @@ def job__list(*others):
             )
             if len(result) > 0:
                 last = result[-1]
-                statuses = f"{last['type']}: {last['message']}"
+                statuses = f"[{last['lastTransitionTime']}] {last['status']}"
             else:
                 statuses = "Unknown"
             # get pod statuses
             data = {
-                "label": "leaderworkerset.sigs.k8s.io/name",
+                "label": TEMPLATE_LABEL,
                 "value": deployment
             }
+            # TODO
             result = request_to_server(
                 method="post",
                 endpoint="/v1/get_pods_status_for_label",
@@ -1458,7 +1449,7 @@ def job__list(*others):
 
 
 @arguably.command
-def job__logs(name, *others, pod_name=None, stream=False):
+def job__logs(name, *others, pod_name=None, stream=False, tail=100):
     """
     Get logs for a specific job
     """
@@ -1469,11 +1460,12 @@ def job__logs(name, *others, pod_name=None, stream=False):
         return
     
     data = {
-        "label": "leaderworkerset.sigs.k8s.io/name",
+        "label": TEMPLATE_LABEL,
         "value": name
     }
     while True:
         try:
+            # send tail as parameter (fetch only last _tail_ lines)
             result = request_to_server(
                 method="post",
                 endpoint="/v1/get_logs_for_label",
@@ -1515,7 +1507,7 @@ def job__manifest(*others, name):
         return
     
     data = {
-        "label": "leaderworkerset.sigs.k8s.io/name",
+        "label": TEMPLATE_LABEL,
         "value": name
     }
     try:
diff --git a/kalavai_client/utils.py b/kalavai_client/utils.py
index 143d77c..677b2e1 100644
--- a/kalavai_client/utils.py
+++ b/kalavai_client/utils.py
@@ -36,11 +36,7 @@
 READONLY_AUTH_KEY = "watcher_readonly_key"
 WATCHER_SERVICE_KEY = "watcher_service"
 WATCHER_PORT_KEY = "watcher_port"
-LONGHORN_UI_PORT_KEY = "longhorn_ui_port"
-LONGHORN_MANAGER_PORT_KEY = "longhorn_manager_port"
-DEPLOY_HELIOS_KEY = "deploy_helios"
 IS_PUBLIC_POOL_KEY = "is_public_pool"
-KALAVAI_API_ENDPOINT_KEY = "kalavai_api_endpoint"
 MANDATORY_TOKEN_FIELDS = [
     CLUSTER_IP_KEY,
     CLUSTER_TOKEN_KEY,
@@ -369,7 +365,10 @@ def store_server_info(server_ip, auth_key, watcher_service, file, node_name, clu
         }, f)
     return True
 
-def load_template(template_path, values, default_values_path=None):
+def populate_template(template_str, values_dict):
+    return Template(template_str).render(values_dict)
+
+def load_template(template_path, values, default_values_path=None, force_defaults=False):
 
     if not Path(template_path).exists():
         raise FileNotFoundError(f"{template_path} does not exist")
@@ -381,12 +380,10 @@ def load_template(template_path, values, default_values_path=None):
         with open(default_values_path, 'r') as f:
             default_values = yaml.safe_load(f)
         for default in default_values:
-            if default["name"] not in values:
+            if not force_defaults or default["name"] not in values:
                 values[default['name']] = default['default']
         
-    template = Template(yaml_template)
-
-    return template.render(values)
+    return populate_template(template_str=yaml_template, values_dict=values)
 
 
 def user_confirm(question: str, options: list, multiple: bool=False) -> int:
diff --git a/templates/aphrodite/examples/qwen2.5-0.5B.yaml b/templates/aphrodite/examples/qwen2.5-0.5B.yaml
index 9ac9712..ff2e07a 100644
--- a/templates/aphrodite/examples/qwen2.5-0.5B.yaml
+++ b/templates/aphrodite/examples/qwen2.5-0.5B.yaml
@@ -8,10 +8,10 @@
   default: "pool-cache"
   description: "Pool storage to use to cache model weights"
 
-- name: num_workers
-  value: "1"
-  default: "1"
-  description: "Workers per deployment (for tensor parallelism)"
+- name: remote_workers
+  value: "0"
+  default: "0"
+  description: "Number of remote workers (for tensor and pipeline parallelism). This is in addition to the main node"
 
 - name: repo_id
   value: Qwen/Qwen2.5-0.5B-Instruct
diff --git a/templates/aphrodite/template.yaml b/templates/aphrodite/template.yaml
index 4d9b89e..2363676 100644
--- a/templates/aphrodite/template.yaml
+++ b/templates/aphrodite/template.yaml
@@ -1,16 +1,24 @@
-apiVersion: leaderworkerset.x-k8s.io/v1
-kind: LeaderWorkerSet
+apiVersion: batch.volcano.sh/v1alpha1
+kind: Job
 metadata:
   name: {{deployment_name}}
   labels:
     # must have this label
-    kalavai.lws.name: {{deployment_name}}
+    kalavai.job.name: {{deployment_name}}
 spec:
-  replicas: {{replicas}}
-  leaderWorkerTemplate:
-    size: {{num_workers}}
-    restartPolicy: RecreateGroupOnPodRestart
-    leaderTemplate:
+  queue: {{queue_name}}
+  #minAvailable: 2
+  schedulerName: volcano
+  plugins:
+    env: []
+    svc: []
+  policies: 
+  - event: PodEvicted # Restart the job when a pod is evicted.
+    action: RestartJob
+  tasks:
+  - replicas: 1   # One ps pod specified
+    name: ps
+    template: # Definition of the ps pod
       metadata:
         annotations:
           # must have these annotations
@@ -18,12 +26,28 @@ spec:
           {{use_gputype}}
         labels:
           role: leader
-          kalavai.lws.name: {{deployment_name}}
+          kalavai.job.name: {{deployment_name}}
       spec:
         runtimeClassName: nvidia
         containers:
-        - name: aphrodite-leader
+        - command:
+          - sh
+          - -c
+          - |
+            RAY_BACKEND_LOG_LEVEL=error /home/ray/workspace/ray_init.sh leader --ray_cluster_size=$(({{remote_workers}}+1)) --ray_port=6379 --ray_object_store_memory={{shmem_size}};
+            sleep 30;
+            nvidia-smi;
+            ray status;
+            /home/ray/workspace/run_model.sh \
+              --repo_id={{repo_id}} \
+              --model_filename={{model_filename}} \
+              --extra='{{extra}}' \
+              --tensor_parallel_size={{tensor_parallel_size}} \
+              --pipeline_parallel_size={{pipeline_parallel_size}} \
+              --local_dir=/home/ray/cache;
+            sleep 30
           image: docker.io/bundenth/ray-aphrodite:v1.0.11
+          name: aphrodite
           env:
           - name: HF_TOKEN
             value: {{hf_token}}
@@ -31,40 +55,23 @@ spec:
             value: /home/ray/cache
           - name: TMPDIR
             value: /home/ray/cache/tmp
-          command:
-            - sh
-            - -c
-            - "/home/ray/workspace/ray_init.sh leader --ray_cluster_size={{num_workers}} --ray_object_store_memory={{shmem_size}};
-                sleep 30;
-                nvidia-smi;
-                ray status;
-                /home/ray/workspace/run_model.sh \
-                  --repo_id={{repo_id}} \
-                  --model_filename={{model_filename}} \
-                  --extra='{{extra}}' \
-                  --tensor_parallel_size={{tensor_parallel_size}} \
-                  --pipeline_parallel_size={{pipeline_parallel_size}} \
-                  --local_dir=/home/ray/cache;
-                sleep 30"
-          resources:
-            requests:
-              cpu: "{{cpus}}"
-              memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
-              nvidia.com/gpucores: 100
-            limits:
-              cpu: "{{cpus}}"
-              memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
-              nvidia.com/gpucores: 100
           ports:
-          # if use 8080 as exposed port (if required)
           - containerPort: 8080
+            name: model-port
           readinessProbe:
             tcpSocket:
               port: 8080
             initialDelaySeconds: 90
             periodSeconds: 30
+          resources:
+            requests:
+              cpu: {{cpus}}
+              memory: {{memory}}Gi
+              nvidia.com/gpu: {{gpus}}
+            limits:
+              cpu: {{cpus}}
+              memory: {{memory}}Gi
+              nvidia.com/gpu: {{gpus}}
           volumeMounts:
             - mountPath: /dev/shm
               name: dshm   
@@ -77,17 +84,32 @@ spec:
             sizeLimit: {{shmem_size}}
         - name: cache
           persistentVolumeClaim:
-            claimName: {{storage}}
-    workerTemplate:
+            claimName: pool-cache
+        restartPolicy: Never
+  - replicas: {{remote_workers}}
+    name: worker
+    policies:
+    - event: TaskCompleted  # The job will be marked as completed when two worker pods finish tasks.
+      action: CompleteJob
+    template: # Definition of worker pods
       metadata:
         annotations:
           # must have these annotations
           {{nouse_gputype}}
           {{use_gputype}}
+        labels:
+          kalavai.job.name: {{deployment_name}}
       spec:
         runtimeClassName: nvidia
         containers:
-        - name: aphrodite-worker
+        - command:
+          - sh
+          - -c
+          - |
+            PS_HOST=`head /etc/volcano/ps.host`;
+            WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`;
+            nvidia-smi;
+            RAY_BACKEND_LOG_LEVEL=error /home/ray/workspace/ray_init.sh worker --ray_address=$PS_HOST --ray_port=6379 --ray_object_store_memory={{shmem_size}} --ray_block=1
           image: docker.io/bundenth/ray-aphrodite:v1.0.11
           env:
           - name: HF_TOKEN
@@ -96,22 +118,19 @@ spec:
             value: /home/ray/cache
           - name: TMPDIR
             value: /home/ray/cache/tmp
-          command:
-            - sh
-            - -c
-            - "nvidia-smi;
-                /home/ray/workspace/ray_init.sh worker --ray_address=$LWS_LEADER_ADDRESS --ray_object_store_memory={{shmem_size}} --ray_block=1"
+          name: aphrodite
+          # ports:
+          # - containerPort: 2222
+          #   name: ray-port
           resources:
             requests:
-              cpu: "{{cpus}}"
+              cpu: {{cpus}}
               memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
-              nvidia.com/gpucores: 100
+              nvidia.com/gpu: {{gpus}}
             limits:
-              cpu: "{{cpus}}"
+              cpu: {{cpus}}
               memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
-              nvidia.com/gpucores: 100
+              nvidia.com/gpu: {{gpus}}
           volumeMounts:
             - mountPath: /dev/shm
               name: dshm
@@ -119,4 +138,5 @@ spec:
         - name: dshm
           emptyDir:
             medium: Memory
-            sizeLimit: {{shmem_size}}
\ No newline at end of file
+            sizeLimit: {{shmem_size}}
+        restartPolicy: Never
diff --git a/templates/aphrodite/values.yaml b/templates/aphrodite/values.yaml
index 6c29c6e..fe83e1a 100644
--- a/templates/aphrodite/values.yaml
+++ b/templates/aphrodite/values.yaml
@@ -8,15 +8,15 @@
   default: "pool-cache"
   description: "Pool storage to use to cache model weights"
 
-- name: replicas
-  value: "1"
-  default: "1"
-  description: "How many replicas to deploy for the model"
+- name: queue_name
+  value: "default"
+  default: "default"
+  description: "Name of the kalavai queue to use"
 
-- name: num_workers
-  value: "1"
-  default: "1"
-  description: "Workers per deployment (for tensor parallelism)"
+- name: remote_workers
+  value: "0"
+  default: "0"
+  description: "Number of remote workers (for tensor and pipeline parallelism). This is in addition to the main node"
 
 - name: repo_id
   value: null
diff --git a/templates/dummy/template.yaml b/templates/dummy/template.yaml
index 649ff4a..3cb8186 100644
--- a/templates/dummy/template.yaml
+++ b/templates/dummy/template.yaml
@@ -1,18 +1,24 @@
-# Specs and examples: https://github.com/kubernetes-sigs/lws/blob/main/docs/examples/sample/README.md
-apiVersion: leaderworkerset.x-k8s.io/v1
-kind: LeaderWorkerSet
+apiVersion: batch.volcano.sh/v1alpha1
+kind: Job
 metadata:
   name: {{deployment_name}}
   labels:
     # must have this label
-    kalavai.lws.name: {{deployment_name}}
+    kalavai.job.name: {{deployment_name}}
 spec:
-  # number of copies for this deployment
-  replicas: 1
-  leaderWorkerTemplate:
-    # how many workers (distributed nodes)
-    size: 2
-    workerTemplate:
+  queue: default
+  #minAvailable: 2
+  schedulerName: volcano
+  plugins:
+    env: []
+    svc: []
+  policies: 
+  - event: PodEvicted # Restart the job when a pod is evicted.
+    action: RestartJob
+  tasks:
+  - replicas: 1   # One ps pod specified
+    name: ps
+    template: # Definition of the ps pod
       spec:
         containers:
         - name: nginx
@@ -30,4 +36,5 @@ spec:
               nvidia.com/gpu: "1"
           ports:
           # what port to make available
-          - containerPort: 8080
\ No newline at end of file
+          - containerPort: 8080
+        restartPolicy: Never
\ No newline at end of file
diff --git a/templates/vllm/template.yaml b/templates/vllm/template.yaml
index a612493..08d8334 100644
--- a/templates/vllm/template.yaml
+++ b/templates/vllm/template.yaml
@@ -1,16 +1,23 @@
-apiVersion: leaderworkerset.x-k8s.io/v1
-kind: LeaderWorkerSet
+apiVersion: batch.volcano.sh/v1alpha1
+kind: Job
 metadata:
   name: {{deployment_name}}
   labels:
     # must have this label
-    kalavai.lws.name: {{deployment_name}}
+    kalavai.job.name: {{deployment_name}}
 spec:
-  replicas: {{replicas}}
-  leaderWorkerTemplate:
-    size: {{num_workers}}
-    restartPolicy: RecreateGroupOnPodRestart
-    leaderTemplate:
+  queue: default
+  schedulerName: volcano
+  plugins:
+    env: []
+    svc: []
+  policies: 
+  - event: PodEvicted # Restart the job when a pod is evicted.
+    action: RestartJob
+  tasks:
+  - replicas: 1   # One ps pod specified
+    name: ps
+    template: # Definition of the ps pod
       metadata:
         annotations:
           # must have these annotations
@@ -18,11 +25,25 @@ spec:
           {{use_gputype}}
         labels:
           role: leader
-          kalavai.lws.name: {{deployment_name}}
+          kalavai.job.name: {{deployment_name}}
       spec:
         runtimeClassName: nvidia
         containers:
-        - name: vllm-leader
+        - command:
+          - sh
+          - -c
+          - |
+            RAY_BACKEND_LOG_LEVEL=error /home/ray/workspace/ray_init.sh leader --ray_cluster_size=$(({{remote_workers}}+1)) --ray_object_store_memory={{shmem_size}};
+              sleep 30;
+              nvidia-smi;
+              ray status;
+              /home/ray/workspace/run_model.sh \
+                --model_id={{model_id}} \
+                --extra='{{extra}}' \
+                --tensor_parallel_size={{tensor_parallel_size}} \
+                --pipeline_parallel_size={{pipeline_parallel_size}};
+              sleep 30
+          name: vllm-leader
           image: docker.io/bundenth/ray-vllm:v1.1.4
           env:
           - name: HF_TOKEN
@@ -31,36 +52,23 @@ spec:
             value: /home/ray/cache
           - name: TMPDIR
             value: /home/ray/cache/tmp
-          command:
-            - sh
-            - -c
-            - "/home/ray/workspace/ray_init.sh leader --ray_cluster_size={{num_workers}} --ray_object_store_memory={{shmem_size}};
-                sleep 30;
-                nvidia-smi;
-                ray status;
-                /home/ray/workspace/run_model.sh \
-                  --model_id={{model_id}} \
-                  --extra='{{extra}}' \
-                  --tensor_parallel_size={{tensor_parallel_size}} \
-                  --pipeline_parallel_size={{pipeline_parallel_size}};
-                sleep 30"
-          resources:
-            requests:
-              cpu: "{{cpus}}"
-              memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
-            limits:
-              cpu: "{{cpus}}"
-              memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
           ports:
-          # if use 8080 as exposed port (if required)
           - containerPort: 8080
+            name: model-port
           readinessProbe:
             tcpSocket:
               port: 8080
             initialDelaySeconds: 90
             periodSeconds: 30
+          resources:
+            requests:
+              cpu: {{cpus}}
+              memory: {{memory}}Gi
+              nvidia.com/gpu: {{gpus}}
+            limits:
+              cpu: {{cpus}}
+              memory: {{memory}}Gi
+              nvidia.com/gpu: {{gpus}}
           volumeMounts:
             - mountPath: /dev/shm
               name: dshm   
@@ -73,18 +81,34 @@ spec:
             sizeLimit: {{shmem_size}}
         - name: cache
           persistentVolumeClaim:
-            claimName: {{storage}}
-    workerTemplate:
+            claimName: pool-cache
+        restartPolicy: Never
+  - replicas: {{remote_workers}}
+    name: worker
+    policies:
+    - event: TaskCompleted  # The job will be marked as completed when two worker pods finish tasks.
+      action: CompleteJob
+    template: # Definition of worker pods
       metadata:
         annotations:
           # must have these annotations
           {{nouse_gputype}}
           {{use_gputype}}
+        labels:
+          kalavai.job.name: {{deployment_name}}
       spec:
         runtimeClassName: nvidia
         containers:
         - name: vllm-worker
           image: docker.io/bundenth/ray-vllm:v1.1.4
+          command:
+          - sh
+          - -c
+          - |
+            PS_HOST=`head /etc/volcano/ps.host`;
+            WORKER_HOST=`cat /etc/volcano/worker.host | sed 's/$/&/g' | sed 's/^/"/;s/$/"/' | tr "\n" ","`;
+            nvidia-smi;
+            RAY_BACKEND_LOG_LEVEL=error /home/ray/workspace/ray_init.sh worker --ray_address=$PS_HOST --ray_port=6379 --ray_object_store_memory={{shmem_size}} --ray_block=1
           env:
           - name: HF_TOKEN
             value: {{hf_token}}
@@ -92,20 +116,15 @@ spec:
             value: /home/ray/cache
           - name: TMPDIR
             value: /home/ray/cache/tmp
-          command:
-            - sh
-            - -c
-            - "nvidia-smi;
-                RAY_BACKEND_LOG_LEVEL=error /home/ray/workspace/ray_init.sh worker --ray_address=$LWS_LEADER_ADDRESS --ray_object_store_memory={{shmem_size}} --ray_block=1"
           resources:
             requests:
-              cpu: "{{cpus}}"
+              cpu: {{cpus}}
               memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
+              nvidia.com/gpu: {{gpus}}
             limits:
-              cpu: "{{cpus}}"
+              cpu: {{cpus}}
               memory: {{memory}}Gi
-              nvidia.com/gpu: "{{gpus}}"
+              nvidia.com/gpu: {{gpus}}
           volumeMounts:
             - mountPath: /dev/shm
               name: dshm
@@ -113,4 +132,5 @@ spec:
         - name: dshm
           emptyDir:
             medium: Memory
-            sizeLimit: {{shmem_size}}
\ No newline at end of file
+            sizeLimit: {{shmem_size}}
+        restartPolicy: Never
diff --git a/templates/vllm/values.yaml b/templates/vllm/values.yaml
index 4e45e37..dbd1bd4 100644
--- a/templates/vllm/values.yaml
+++ b/templates/vllm/values.yaml
@@ -8,15 +8,10 @@
   default: "pool-cache"
   description: "Pool storage to use to cache model weights"
 
-- name: replicas
-  value: "1"
-  default: "1"
-  description: "How many replicas to deploy for the model"
-
-- name: num_workers
-  value: "1"
-  default: "1"
-  description: "Workers per deployment (for tensor parallelism)"
+- name: remote_workers
+  value: "0"
+  default: "0"
+  description: "Number of remote workers (for tensor and pipeline parallelism). This is in addition to the main node"
 
 - name: model_id
   value: null
@@ -38,14 +33,9 @@
   default: "1"
   description: "GPUs per single worker (final one = gpus * num_workers)"
 
-- name: gpu_vram
-  value: "4000"
-  default: "4000"
-  description: "vRAM per GPU (total one = num_workers * gpus * gpu_vram)"
-
 - name: memory
-  value: "4Gi"
-  default: "4Gi"
+  value: "4"
+  default: "4"
   description: "RAM memory per single worker (final one = memory * num_workers)"
 
 - name: tensor_parallel_size