diff --git a/.github/workflows/largemodel_unit_test_CI.yml b/.github/workflows/largemodel_unit_test_CI.yml
index e00a3a68c..feb726ce1 100644
--- a/.github/workflows/largemodel_unit_test_CI.yml
+++ b/.github/workflows/largemodel_unit_test_CI.yml
@@ -113,41 +113,7 @@ jobs:
           mvn clean package
 
       - name: Start Vespa
-        run: |
-          # Define these for checking if Vespa is ready
-          export VESPA_CONFIG_URL=http://localhost:19071
-          export VESPA_DOCUMENT_URL=http://localhost:8080
-          export VESPA_QUERY_URL=http://localhost:8080
-          
-          
-          cd marqo/scripts/vespa_local
-          set -x
-          python vespa_local.py start
-          set +x
-
-          echo "Waiting for Vespa to start"
-          for i in {1..20}; do
-              echo -ne "Waiting... $i seconds\r"
-              sleep 1
-          done
-          echo -e "\nDone waiting."
-          
-          # Zip up schemas and services
-          sudo apt-get install zip -y
-          zip -r vespa_tester_app.zip services.xml schemas
-
-          # Deploy application with test schema
-          curl --header "Content-Type:application/zip" --data-binary @vespa_tester_app.zip http://localhost:19071/application/v2/tenant/default/prepareandactivate
-
-          # wait for vespa to start (document url):
-          timeout 10m bash -c 'until curl -f -X GET $VESPA_DOCUMENT_URL >/dev/null 2>&1; do echo "  Waiting for Vespa document API to be available..."; sleep 10; done;' || \
-            (echo "Vespa (Document URL) did not start in time" && exit 1)
-          
-          echo "Vespa document API is available. Local Vespa setup complete."
-
-          # Delete the zip file
-          rm vespa_tester_app.zip
-          echo "Deleted vespa_tester_app.zip"
+        run: python marqo/scripts/vespa_local/vespa_local.py full-start --Shards ${{ inputs.number_of_shards || 1 }} --Replicas ${{ inputs.number_of_replicas || 0 }}
 
       - name: Run Large Model Unit Tests
         run: |
diff --git a/.github/workflows/unit_test_200gb_CI.yml b/.github/workflows/unit_test_200gb_CI.yml
index eaefab8bc..cc64b7dd7 100644
--- a/.github/workflows/unit_test_200gb_CI.yml
+++ b/.github/workflows/unit_test_200gb_CI.yml
@@ -1,9 +1,36 @@
 name: unit_test_200gb_CI
+run-name: Unit Tests with ${{ inputs.number_of_shards || 1 }} shards and ${{ inputs.number_of_replicas || 0 }} replicas
 # runs unit tests on AMD64 machine
 
 on:
   workflow_call:
+    inputs:
+      number_of_shards:
+        type: number
+        description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
+        required: true
+        default: 1
+
+      number_of_replicas:
+        type: number
+        description: 'Number of replicas (groups in Vespa minus 1). Minimum of 0.'
+        required: true
+        default: 0
+
   workflow_dispatch:
+    inputs:
+      number_of_shards:
+        type: number
+        description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
+        required: true
+        default: 1
+
+      number_of_replicas:
+        type: number
+        description: 'Number of replicas (groups in Vespa - 1)'
+        required: true
+        default: 0
+
   push:
     branches:
       - mainline
@@ -16,7 +43,7 @@ on:
       - releases/*
 
 concurrency:
-  group: integ-tests-${{ github.ref }}
+  group: unit-tests-${{ github.ref }}-${{ inputs.number_of_shards }}-${{ inputs.number_of_replicas }}
   cancel-in-progress: true
 
 permissions:
@@ -39,9 +66,9 @@ jobs:
         run: |
           cd marqo
           set -x
-          
+
           # Determine BASE_COMMIT and HEAD_COMMIT based on the event type
-          if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
+          if [[ "${GITHUB_EVENT_NAME}" == "pull_request"  || "${GITHUB_EVENT_NAME}" == "pull_request_review" ]]; then
             BASE_COMMIT=${{ github.event.pull_request.base.sha }}
             HEAD_COMMIT=${{ github.event.pull_request.head.sha }}
           elif [[ "${GITHUB_EVENT_NAME}" == "push" ]]; then
@@ -70,11 +97,46 @@ jobs:
             echo "doc_only=true" >> $GITHUB_OUTPUT
           fi
 
-  Start-Runner:
-    name: Start self-hosted EC2 runner
+  Determine-Vespa-Setup:
+    needs:
+      - Check-Changes
     runs-on: ubuntu-latest
+    if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
+    outputs:
+      VESPA_MULTINODE_SETUP: ${{ steps.set_var.outputs.VESPA_MULTINODE_SETUP }}
+      MULTINODE_TEST_ARGS: ${{ steps.set_var.outputs.MULTINODE_TEST_ARGS }}
+    steps:
+      - name: Determine VESPA_MULTINODE_SETUP
+        id: set_var
+        run: |
+          # For single node, initialize as false
+          echo "VESPA_MULTINODE_SETUP=false" >> $GITHUB_OUTPUT
+          # Only enforce coverage check if single node
+          echo "MULTINODE_TEST_ARGS=--cov-fail-under=69" >> $GITHUB_OUTPUT
+          echo "First assuming single node Vespa setup."
+
+          # Extract inputs safely, defaulting to 1 (for shards), 0 (for replicas) if not present
+          NUMBER_OF_SHARDS="${{ inputs.number_of_shards || 1 }}"
+          NUMBER_OF_REPLICAS="${{ inputs.number_of_replicas || 0 }}"
+
+          # Convert inputs to integers
+          NUMBER_OF_SHARDS_INT=$(echo "$NUMBER_OF_SHARDS" | awk '{print int($0)}')
+          NUMBER_OF_REPLICAS_INT=$(echo "$NUMBER_OF_REPLICAS" | awk '{print int($0)}')
+
+          # Evaluate the conditions
+          if [[ "$NUMBER_OF_SHARDS_INT" -gt 1 || "$NUMBER_OF_REPLICAS_INT" -gt 0 ]]; then
+            echo "Now using multi-node Vespa setup. Shards are $NUMBER_OF_SHARDS_INT and replicas are $NUMBER_OF_REPLICAS_INT."
+            echo "VESPA_MULTINODE_SETUP=true" >> $GITHUB_OUTPUT
+            # If multinode vespa, ignore unrelated tests to save time and prevent errors
+            echo "MULTINODE_TEST_ARGS=--multinode --ignore=tests/integ_tests/core/index_management/test_index_management.py --ignore=tests/integ_tests/core/inference --ignore=tests/integ_tests/processing --ignore=tests/integ_tests/s2_inference" >> $GITHUB_OUTPUT
+          fi
+
+  Start-Runner:
     needs:
+      - Determine-Vespa-Setup
       - Check-Changes
+    name: Start self-hosted EC2 runner
+    runs-on: ubuntu-latest
     if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
     outputs:
       label: ${{ steps.start-ec2-runner.outputs.label }}
@@ -93,7 +155,8 @@ jobs:
           mode: start
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
           ec2-image-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
-          ec2-instance-type: m6i.xlarge
+          # m6i.xlarge if single node vespa, but m6i.2xlarge if multinode vespa
+          ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' || 'm6i.xlarge' }}
           subnet-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SUBNET_ID }}
           security-group-id: ${{ secrets.MARQO_WORKFLOW_TESTS_SECURITY_GROUP_ID }}
           aws-resource-tags: > # optional, requires additional permissions
@@ -111,9 +174,13 @@ jobs:
     needs:
       - Check-Changes # required to start the main job when the runner is ready
       - Start-Runner # required to get output from the start-runner job
+      - Determine-Vespa-Setup
     if: ${{ needs.Check-Changes.outputs.doc_only == 'false' }} # Run only if there are non-documentation changes
     runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
     environment: marqo-test-suite
+    env:
+      VESPA_MULTINODE_SETUP: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP }}
+      MULTINODE_TEST_ARGS: ${{ needs.Determine-Vespa-Setup.outputs.MULTINODE_TEST_ARGS }}
     steps:
       - name: Checkout marqo repo
         uses: actions/checkout@v3
@@ -171,7 +238,7 @@ jobs:
           mvn clean package
 
       - name: Start Vespa
-        run: python marqo/tests/api_tests/v1/scripts/start_vespa.py
+        run: python marqo/scripts/vespa_local/vespa_local.py full-start --Shards ${{ inputs.number_of_shards || 1 }} --Replicas ${{ inputs.number_of_replicas || 0 }}
 
       - name: Run Unit Tests
         run: |
@@ -186,10 +253,10 @@ jobs:
 
           cd marqo
 
-          export PYTHONPATH="./src"
+          export PYTHONPATH="./src:."
           set -o pipefail
-          pytest --ignore=tests/integ_tests/test_documentation.py \
-            --durations=100 --cov=src --cov-branch --cov-context=test --cov-fail-under=69 \
+          pytest ${{ env.MULTINODE_TEST_ARGS }} --ignore=tests/integ_tests/test_documentation.py \
+            --durations=100 --cov=src --cov-branch --cov-context=test \
             --cov-report=html:cov_html --cov-report=xml:cov.xml --cov-report term:skip-covered \
             --md-report --md-report-flavor gfm --md-report-output pytest_result_summary.md \
             tests/integ_tests/ | tee pytest_output.txt
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index 6cbb8265b..933ffeba2 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -52,7 +52,7 @@ jobs:
       - name: Run Unit Tests
         run: |
           cd marqo
-          export PYTHONPATH="./src"
+          export PYTHONPATH="./src:."
           pytest tests/unit_tests/ --durations=100 --cov=src --cov-branch --cov-context=test --cov-report=html:cov_html --cov-report=lcov:lcov.info
 
       - name: Upload Test Report
diff --git a/.github/workflows/unit_tests_with_shards_and_replicas.yml b/.github/workflows/unit_tests_with_shards_and_replicas.yml
new file mode 100644
index 000000000..6b1f4b273
--- /dev/null
+++ b/.github/workflows/unit_tests_with_shards_and_replicas.yml
@@ -0,0 +1,52 @@
+# Runs unit tests on 4 cases:
+# 1. single node vespa
+# 2. multinode vespa: 1 shard, 1 replica
+# 3. multinode vespa: 2 shard, 0 replicas
+# 4. multinode vespa: 2 shards, 1 replicas
+# Runs only once on PR approval
+
+name: Unit Tests with Shards and Replicas
+
+on:
+  workflow_dispatch:
+  pull_request_review:
+    types: [submitted]
+    branches:
+      - mainline
+      - 'releases/*'
+
+permissions:
+  contents: read
+
+jobs:
+  Unit-Tests-1-Shard-0-Replica:
+    uses: ./.github/workflows/unit_test_200gb_CI.yml
+    secrets: inherit
+    if: github.event_name == 'workflow_dispatch' || github.event.review.state == 'approved'
+    with:
+        number_of_shards: 1
+        number_of_replicas: 0
+
+  Unit-Tests-1-Shard-1-Replica:
+    uses: ./.github/workflows/unit_test_200gb_CI.yml
+    secrets: inherit
+    if: github.event_name == 'workflow_dispatch' || github.event.review.state == 'approved'
+    with:
+      number_of_shards: 1
+      number_of_replicas: 1
+
+  Unit-Tests-2-Shard-0-Replica:
+    uses: ./.github/workflows/unit_test_200gb_CI.yml
+    secrets: inherit
+    if: github.event_name == 'workflow_dispatch' || github.event.review.state == 'approved'
+    with:
+      number_of_shards: 2
+      number_of_replicas: 0
+
+  Unit-Tests-2-Shard-1-Replica:
+    uses: ./.github/workflows/unit_test_200gb_CI.yml
+    secrets: inherit
+    if: github.event_name == 'workflow_dispatch' || github.event.review.state == 'approved'
+    with:
+      number_of_shards: 2
+      number_of_replicas: 1
diff --git a/.gitignore b/.gitignore
index f1e4c26a1..82dc3ecf0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -149,5 +149,15 @@ dump.rdb
 
 .DS_Store
 
+# Local vespa artifacts
 # Tester app for unit tests
-scripts/vespa_local/vespa_tester_app.zip
\ No newline at end of file
+scripts/vespa_local/vespa_tester_app.zip
+
+# Dynamically generated files for multinode vespa
+scripts/vespa_local/docker-compose.yml
+scripts/vespa_local/services.xml
+scripts/vespa_local/hosts.xml
+
+scripts/vespa_local/multinode/docker-compose.yml
+scripts/vespa_local/multinode/services.xml
+scripts/vespa_local/multinode/hosts.xml
\ No newline at end of file
diff --git a/requirements.dev.txt b/requirements.dev.txt
index bba40938a..daf9eee22 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -4,4 +4,8 @@ pytest==8.3.4
 pytest-cov==6.0.0
 diff-cover==9.2.0
 pytest-md-report==0.6.2
-pytest-asyncio==0.23.8
\ No newline at end of file
+pytest-asyncio==0.23.8
+
+# For vespa_local setup
+docker==7.1.0
+PyYAML==6.0.2
\ No newline at end of file
diff --git a/scripts/__init__.py b/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/vespa_local/README.md b/scripts/vespa_local/README.md
new file mode 100644
index 000000000..851b5c8cd
--- /dev/null
+++ b/scripts/vespa_local/README.md
@@ -0,0 +1,58 @@
+# Setting up Vespa locally
+When running Marqo or the unit test suite locally, a Vespa node or cluster needs to be running. To assist with this, 
+this directory comes with scripts to set up either a single node (1 container) or multinode-HA Vespa on your machine.
+
+### Set Vespa version
+- By default, this script will use Vespa 8.431.32, as defined in `vespa_local.py`. To change it, set the `VESPA_VERSION`
+variable to the desired version. For example:
+```commandline
+export VESPA_VERSION="latest"
+```
+## Single Node Vespa (default & recommended)
+- Runs 1 Vespa container on your machine. This serves as the config, api, and content node.
+- This is equivalent to running Vespa with 0 replicas and 1 shard.
+- Start with this command:
+```commandline
+python vespa_local.py start
+```
+- This will run the Vespa docker container then copy the `services.xml` file from the `singlenode/` directory to 
+this directory. This will be bundled into the Vespa application upon deployment.
+
+## Multi-node Vespa
+- Runs a Vespa cluster with the following nodes:
+  - 3 config nodes
+  - `m` content nodes, where `m` is `number_of_shards * (1 + number_of_replicas)`
+  - `n` API nodes, where `n` is `max(2, number_of_content_nodes)`
+- For example, with 2 shards and 1 replica, it will run 4 content nodes and 2 API nodes.
+- Start with this command:
+```commandline
+python vespa_local.py start --Shards 2 --Replicas 1
+```
+
+## Deployment
+- After starting the Vespa node(s), you can deploy the Vespa application with the files in this directory using:
+```commandline
+python vespa_local.py deploy-config
+```
+- For single node, you can check for readiness using:
+```
+curl -s http://localhost:19071/state/v1/health
+```
+- For multi-node, the start script will output a list of URLs corresponding to the API and content nodes.
+You can curl each one to check for readiness.
+
+## Other Commands
+### Stop Vespa
+```commandline
+python vespa_local.py stop
+```
+### Restart Vespa
+```commandline
+python vespa_local.py restart
+```
+
+## Notes
+- When running other commands in this script (stop, restart), it will check for the presence of a container named 
+`vespa`, and will assume setup is single node if it finds one. If not, it will assume setup is multi-node.
+- For multi-node, expect config and API nodes to take ~1gb of memory, while content nodes take ~500mb each. Adjust your
+resource allotment accordingly.
\ No newline at end of file
diff --git a/scripts/vespa_local/__init__.py b/scripts/vespa_local/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/scripts/vespa_local/schemas/test_vespa_client.sd b/scripts/vespa_local/schemas/test_vespa_client.sd
deleted file mode 100644
index 2cfca4ee3..000000000
--- a/scripts/vespa_local/schemas/test_vespa_client.sd
+++ /dev/null
@@ -1,34 +0,0 @@
-schema test_vespa_client {
-
-    document test_vespa_client {
-        field marqo__id type string {
-            indexing: summary | attribute
-        }
-
-        field id type string {
-            indexing: summary | attribute
-        }
-
-        field title type string {
-            indexing: summary | attribute | index
-            index: enable-bm25
-        }
-
-        field contents type string {
-            indexing: summary | attribute | index
-            index: enable-bm25
-        }
-
-    }
-
-    fieldset default {
-        fields: title, contents
-    }
-
-    rank-profile bm25 inherits default {
-        first-phase {
-            expression: bm25(title) + bm25(contents)
-        }
-    }
-
-}
diff --git a/scripts/vespa_local/vespa_local.py b/scripts/vespa_local/vespa_local.py
index e5004d559..bbc2432d1 100644
--- a/scripts/vespa_local/vespa_local.py
+++ b/scripts/vespa_local/vespa_local.py
@@ -1,49 +1,678 @@
-import argparse
+"""This is a script that pull/start the vespa docker image and deploy a dummy application package.
+To run everything (single node), use `python vespa_local.py full-start`.
+To run everything (multi node), use `python vespa_local.py full-start --Shards 2 --Replicas 1`.
+
+It can be used in Marqo local runs to start Vespa outside the Marqo docker container. This requires
+that the host machine has docker installed.
+
+We generate a schema.sd file and a services.xml file and put them in a zip file. We then deploy the zip file
+using the REST API. After that, we check if Vespa is up and running. If it is, we can start Marqo.
+
+All the files are created in a directory called vespa_dummy_application_package. This directory is removed and
+the zip file is removed after the application package is deployed.
+
+Note: Vespa CLI is not needed for full-start as we use the REST API to deploy the application package.
+"""
 
 import os
+import shutil
+import subprocess
+import textwrap
+import time
+import sys
+import yaml
+import docker
+import xml.etree.ElementTree as ET
+from xml.dom import minidom
+import math
+
+import requests
+import argparse
+
+VESPA_VERSION = os.getenv('VESPA_VERSION', '8.472.109')
+VESPA_CONFIG_URL="http://localhost:19071"
+VESPA_DOCUMENT_URL="http://localhost:8080"
+VESPA_QUERY_URL="http://localhost:8080"
+MINIMUM_API_NODES = 2
+
+
+class VespaLocal:
+    # Base directory for the application package
+    base_dir = "vespa_dummy_application_package"
+    subdirs = ["schemas"]
+
+    def get_test_vespa_client_schema_content(self) -> str:
+        """
+        Get the content for the test_vespa_client.sd file. Should be the same for single and multi node vespa.
+        """
+        return textwrap.dedent("""
+            schema test_vespa_client {
+                document test_vespa_client {
+
+                    field id type string {
+                        indexing: summary | attribute
+                    }
+
+                    field title type string {
+                        indexing: summary | attribute | index
+                        index: enable-bm25
+                    }
+
+                    field contents type string {
+                        indexing: summary | attribute | index
+                        index: enable-bm25
+                    }
+
+                }
+
+                fieldset default {
+                    fields: title, contents
+                }
+
+                rank-profile bm25 inherits default {
+                    first-phase {
+                        expression: bm25(title) + bm25(contents)
+                    }
+                }
+            }
+            """)
+
+    def generate_application_package_files(self):
+        """
+        Generate files to be zipped for application package
+        """
+
+        # Create the directories and files, and write content
+        os.makedirs(self.base_dir, exist_ok=True)
+        for subdir in self.subdirs:
+            os.makedirs(os.path.join(self.base_dir, subdir), exist_ok=True)
+            for file in self.application_package_files[subdir]:
+                file_path = os.path.join(self.base_dir, subdir, file)
+                with open(file_path, 'w') as f:
+                    if file == "test_vespa_client.sd":
+                        content_for_test_vespa_client_sd = self.get_test_vespa_client_schema_content()
+                        f.write(content_for_test_vespa_client_sd)
+        for file in self.application_package_files[""]:
+            file_path = os.path.join(self.base_dir, file)
+            with open(file_path, 'w') as f:
+                if file == "services.xml":
+                    content_for_services_xml = self.get_services_xml_content()
+                    f.write(content_for_services_xml)
+                if file == "hosts.xml":
+                    # This will only happen for multinode
+                    content_for_hosts_xml = self.get_hosts_xml_content()
+                    f.write(content_for_hosts_xml)
+
+    def generate_application_package(self) -> str:
+        # Build application package directory
+        self.generate_application_package_files()
+
+        # Zip up files
+        os.chdir(self.base_dir)
+        shutil.make_archive('../' + self.base_dir, 'zip', ".")
+        os.chdir("..")
+        zip_file_path = f"{self.base_dir}.zip"
+
+        if os.path.isfile(zip_file_path):
+            print(f"Zip file created successfully: {zip_file_path}")
+            # Remove the base directory
+            shutil.rmtree(self.base_dir)
+            print(f"Directory {self.base_dir} removed.")
+            return zip_file_path
+        else:
+            print("Failed to create the zip file.")
+
+class VespaLocalSingleNode(VespaLocal):
+
+    def __init__(self):
+        self.application_package_files = {
+            "schemas": ["test_vespa_client.sd"],
+            "": ["services.xml"]
+        }
+        print("Creating single node Vespa setup.")
+
+    def start(self):
+        os.system("docker rm -f vespa 2>/dev/null || true")
+        os.system("docker run --detach "
+                  "--name vespa "
+                  "--hostname vespa-container "
+                  "--publish 8080:8080 --publish 19071:19071 --publish 2181:2181 --publish 127.0.0.1:5005:5005 "
+                  f"vespaengine/vespa:{VESPA_VERSION}")
+
+    def get_services_xml_content(self) -> str:
+        return textwrap.dedent(
+            """<?xml version="1.0" encoding="utf-8" ?>
+            <!-- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+            <services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">
+                <container id="default" version="1.0">
+                    <document-api/>
+                    <search/>
+                    <nodes>
+                        <node hostalias="node1"/>
+                    </nodes>
+                </container>
+                <content id="content_default" version="1.0">
+                    <redundancy>2</redundancy>
+                    <documents>
+                        <document type="test_vespa_client" mode="index"/>
+                    </documents>
+                    <nodes>
+                        <node hostalias="node1" distribution-key="0"/>
+                    </nodes>
+                </content>
+            </services>
+            """)
+
+    def wait_vespa_running(self, max_wait_time: int = 60):
+        start_time = time.time()
+        # Check if the single vespa container is running
+        while True:
+            if time.time() - start_time > max_wait_time:
+                print("Maximum wait time exceeded. Vespa container may not be running.")
+                break
+
+            try:
+                output = subprocess.check_output(["docker", "inspect", "--format", "{{.State.Status}}", "vespa"])
+                if output.decode().strip() == "running":
+                    print("Vespa container is up and running.")
+                    break
+            except subprocess.CalledProcessError:
+                pass
+
+            print("Waiting for Vespa container to start...")
+            time.sleep(5)
+
+
+class VespaLocalMultiNode(VespaLocal):
+
+    def __init__(self, number_of_shards, number_of_replicas):
+        self.number_of_shards = number_of_shards
+        self.number_of_replicas = number_of_replicas
+        self.application_package_files = {
+            "schemas": ["test_vespa_client.sd"],
+            "": ["hosts.xml", "services.xml"]
+        }
+        print(f"Creating multi-node Vespa setup with {number_of_shards} shards and {number_of_replicas} replicas.")
+
+    def generate_docker_compose(self, vespa_version: str):
+        """
+        Create docker compose file for multinode vespa with 3 config nodes.
+        Generates (number_of_replicas + 1) * number_of shards content nodes.
+        """
+        services = {}
+
+        print(
+            f"Creating `docker-compose.yml` with {self.number_of_shards} shards and {self.number_of_replicas} replicas.")
+
+        BASE_CONFIG_PORT_A = 19071  # configserver (deploy here)
+        BASE_SLOBROK_PORT = 19100  # slobrok
+        BASE_CLUSTER_CONTROLLER_PORT = 19050  # cluster-controller
+        BASE_ZOOKEEPER_PORT = 2181  # zookeeper
+        BASE_METRICS_PROXY_PORT = 20092  # metrics-proxy (every node has it)
+
+        BASE_API_PORT_A = 8080  # document/query API
+        BASE_DEBUG_PORT = 5005  # debugging port
+
+        BASE_CONTENT_PORT_A = 19107
+
+        TOTAL_CONTENT_NODES = (self.number_of_replicas + 1) * self.number_of_shards
+        TOTAL_API_NODES = max(MINIMUM_API_NODES, math.ceil(TOTAL_CONTENT_NODES / 4))
+        print(f"Total content nodes: {TOTAL_CONTENT_NODES}, Total API nodes: {TOTAL_API_NODES}")
+
+        # Config Nodes (3)
+        nodes_created = 0
+        urls_to_health_check = []  # List all API and content node URLs here
+        TOTAL_CONFIG_NODES = 3
+        for config_node in range(TOTAL_CONFIG_NODES):
+            services[f'config-{config_node}'] = {
+                'image': f"vespaengine/vespa:{vespa_version or 'latest'}",
+                'container_name': f'config-{config_node}',
+                'hostname': f'config-{config_node}.vespanet',
+                'environment': {
+                    'VESPA_CONFIGSERVERS': 'config-0.vespanet,config-1.vespanet,config-2.vespanet',
+                    'VESPA_CONFIGSERVER_JVMARGS': '-Xms32M -Xmx128M',
+                    'VESPA_CONFIGPROXY_JVMARGS': '-Xms32M -Xmx128M'
+                },
+                'networks': [
+                    'vespanet'
+                ],
+                'ports': [
+                    f'{BASE_CONFIG_PORT_A + config_node}:19071',
+                    f'{BASE_SLOBROK_PORT + config_node}:19100',
+                    f'{BASE_CLUSTER_CONTROLLER_PORT + config_node}:19050',
+                    f'{BASE_ZOOKEEPER_PORT + config_node}:2181',
+                    f'{BASE_METRICS_PROXY_PORT + nodes_created}:19092'
+                ],
+                'command': 'configserver,services',
+                'healthcheck': {
+                    'test': "curl http://localhost:19071/state/v1/health",
+                    'timeout': '10s',
+                    'retries': 3,
+                    'start_period': '40s'
+                }
+            }
+            # Add additional ports to adminserver
+            if config_node == 0:
+                services[f'config-{config_node}']['ports'].append('19098:19098')
+
+            nodes_created += 1
+
+        # API Nodes
+        for api_node in range(TOTAL_API_NODES):
+            services[f'api-{api_node}'] = {
+                'image': f"vespaengine/vespa:{vespa_version or 'latest'}",
+                'container_name': f'api-{api_node}',
+                'hostname': f'api-{api_node}.vespanet',
+                'environment': [
+                    'VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet'
+                ],
+                'networks': [
+                    'vespanet'
+                ],
+                'ports': [
+                    f'{BASE_API_PORT_A + api_node}:8080',
+                    f'{BASE_DEBUG_PORT + api_node}:5005',
+                    f'{BASE_METRICS_PROXY_PORT + nodes_created}:19092'
+                ],
+                'command': 'services',
+                'depends_on': {
+                    'config-0': {'condition': 'service_healthy'},
+                    'config-1': {'condition': 'service_healthy'},
+                    'config-2': {'condition': 'service_healthy'}
+                }
+            }
+            urls_to_health_check.append(f"http://localhost:{BASE_API_PORT_A + api_node}/state/v1/health")
+            nodes_created += 1
+
+        # Content Nodes
+        i = 0  # counter of content nodes generated
+        for group in range(self.number_of_replicas + 1):
+            for shard in range(self.number_of_shards):
+                node_name = f'content-{group}-{shard}'
+                host_ports = [
+                    f'{BASE_CONTENT_PORT_A + i}:19107',
+                    f'{BASE_METRICS_PROXY_PORT + nodes_created}:19092'
+                ]
+                services[node_name] = {
+                    'image': f'vespaengine/vespa:{vespa_version or "latest"}',
+                    'container_name': node_name,
+                    'hostname': f'{node_name}.vespanet',
+                    'environment': [
+                        'VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet'
+                    ],
+                    'networks': [
+                        'vespanet'
+                    ],
+                    'ports': host_ports,
+                    'command': 'services',
+                    'depends_on': {
+                        'config-0': {'condition': 'service_healthy'},
+                        'config-1': {'condition': 'service_healthy'},
+                        'config-2': {'condition': 'service_healthy'}
+                    }
+                }
+                urls_to_health_check.append(f"http://localhost:{BASE_CONTENT_PORT_A + i}/state/v1/health")
+                i += 1
+                nodes_created += 1
+
+        # Define Networks
+        networks = {
+            'vespanet': {
+                'driver': 'bridge'
+            }
+        }
+
+        # Combine into final docker-compose structure
+        docker_compose = {
+            'services': services,
+            'networks': networks
+        }
+
+        with open('docker-compose.yml', 'w') as f:
+            yaml.dump(docker_compose, f, sort_keys=False)
+        print(f"Generated `docker-compose.yml` successfully.")
+
+        print("Health check URLs:")
+        for url in urls_to_health_check:
+            print(url)
+
+    def get_services_xml_content(self):
+        """
+        Create services.xml for multinode vespa with 3 config nodes.
+        Generates (number_of_replicas + 1) groups of number_of shards content nodes each.
+        """
+
+        print(f"Writing content for `services.xml` with {self.number_of_shards} shards and {self.number_of_replicas} replicas.")
+        TOTAL_CONTENT_NODES = (self.number_of_replicas + 1) * self.number_of_shards
+        TOTAL_API_NODES = max(MINIMUM_API_NODES, math.ceil(TOTAL_CONTENT_NODES / 4))
+        print(f"Total content nodes: {TOTAL_CONTENT_NODES}, Total API nodes: {TOTAL_API_NODES}")
+
+        # Define the root element with namespaces
+        services = ET.Element('services', {
+            'version': '1.0',
+            'xmlns:deploy': 'vespa',
+            'xmlns:preprocess': 'properties'
+        })
+
+        # Admin Section
+        admin = ET.SubElement(services, 'admin', {'version': '2.0'})
+
+        configservers = ET.SubElement(admin, 'configservers')
+        ET.SubElement(configservers, 'configserver', {'hostalias': 'config-0'})
+        ET.SubElement(configservers, 'configserver', {'hostalias': 'config-1'})
+        ET.SubElement(configservers, 'configserver', {'hostalias': 'config-2'})
+
+        cluster_controllers = ET.SubElement(admin, 'cluster-controllers')
+        ET.SubElement(cluster_controllers, 'cluster-controller', {
+            'hostalias': 'config-0',
+            'jvm-options': '-Xms32M -Xmx64M'
+        })
+        ET.SubElement(cluster_controllers, 'cluster-controller', {
+            'hostalias': 'config-1',
+            'jvm-options': '-Xms32M -Xmx64M'
+        })
+        ET.SubElement(cluster_controllers, 'cluster-controller', {
+            'hostalias': 'config-2',
+            'jvm-options': '-Xms32M -Xmx64M'
+        })
 
-VESPA_VERSION=os.getenv('VESPA_VERSION', '8.472.109')  # Vespa version to use as marqo-base:49
+        slobroks = ET.SubElement(admin, 'slobroks')
+        ET.SubElement(slobroks, 'slobrok', {'hostalias': 'config-0'})
+        ET.SubElement(slobroks, 'slobrok', {'hostalias': 'config-1'})
+        ET.SubElement(slobroks, 'slobrok', {'hostalias': 'config-2'})
+
+        # Note: We only have 1 config node for admin.
+        ET.SubElement(admin, 'adminserver', {'hostalias': 'config-0'})
+
+        # Container Section (API nodes)
+        container = ET.SubElement(services, 'container', {'id': 'default', 'version': '1.0'})
+        ET.SubElement(container, 'document-api')
+        ET.SubElement(container, 'search')
+
+        nodes = ET.SubElement(container, 'nodes')
+        ET.SubElement(nodes, 'jvm', {
+            'options': '-Xms32M -Xmx256M -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005'
+        })
+        for api_node_number in range(TOTAL_API_NODES):
+            ET.SubElement(nodes, 'node', {'hostalias': f'api-{api_node_number}'})
+
+        # Content Section
+        content = ET.SubElement(services, 'content', {'id': 'content_default', 'version': '1.0'})
+        # Optional: Redundancy can be commented out or adjusted
+        redundancy = ET.SubElement(content, 'redundancy')
+        redundancy.text = str(self.number_of_replicas + 1)  # As per Vespa's redundancy calculation
+
+        documents = ET.SubElement(content, 'documents')
+        ET.SubElement(documents, 'document', {
+            'type': 'test_vespa_client',
+            'mode': 'index'
+        })
+
+        group_parent = ET.SubElement(content, 'group')
+
+        # Distribution configuration
+        ET.SubElement(group_parent, 'distribution', {'partitions': '1|' * self.number_of_replicas + "*"})
+
+        # Generate Groups and Nodes
+        node_distribution_key = 0
+        for group_number in range(self.number_of_replicas + 1):  # +1 for the primary group
+            group = ET.SubElement(group_parent, 'group', {
+                'name': f'group-{group_number}',
+                'distribution-key': str(group_number)
+            })
+            for shard_number in range(self.number_of_shards):
+                hostalias = f'content-{group_number}-{shard_number}'
+                ET.SubElement(group, 'node', {
+                    'hostalias': hostalias,
+                    'distribution-key': str(node_distribution_key)
+                })
+                node_distribution_key += 1
+
+        # Convert the ElementTree to a string
+        rough_string = ET.tostring(services, 'utf-8')
+        reparsed = minidom.parseString(rough_string)
+        pretty_xml_bytes = reparsed.toprettyxml(indent="    ", encoding='utf-8')
+        pretty_xml = pretty_xml_bytes.decode('utf-8')
+
+        print("Generated services.xml content successfully!")
+        return pretty_xml
+
+    def get_hosts_xml_content(self):
+        """
+        Create hosts.xml for multinode vespa with 3 config nodes.
+        Generates (number_of_replicas + 1) groups of number_of shards content nodes each.
+        """
+
+        print(f"Writing content for `hosts.xml` with {self.number_of_shards} shards and {self.number_of_replicas} replicas.")
+        TOTAL_CONTENT_NODES = (self.number_of_replicas + 1) * self.number_of_shards
+        TOTAL_API_NODES = max(MINIMUM_API_NODES, math.ceil(TOTAL_CONTENT_NODES / 4))
+        print(f"Total content nodes: {TOTAL_CONTENT_NODES}, Total API nodes: {TOTAL_API_NODES}")
+
+        # Define the root element
+        hosts = ET.Element('hosts')
+
+        # Config Nodes (3)
+        config_0 = ET.SubElement(hosts, 'host', {'name': 'config-0.vespanet'})
+        alias_config_0 = ET.SubElement(config_0, 'alias')
+        alias_config_0.text = 'config-0'
+
+        config_1 = ET.SubElement(hosts, 'host', {'name': 'config-1.vespanet'})
+        alias_config_1 = ET.SubElement(config_1, 'alias')
+        alias_config_1.text = 'config-1'
+
+        config_2 = ET.SubElement(hosts, 'host', {'name': 'config-2.vespanet'})
+        alias_config_2 = ET.SubElement(config_2, 'alias')
+        alias_config_2.text = 'config-2'
+
+        # API Nodes (container)
+        for api_node_number in range(TOTAL_API_NODES):
+            api_node = ET.SubElement(hosts, 'host',
+                                     {'name': f'api-{api_node_number}.vespanet'})
+            alias_api_node = ET.SubElement(api_node, 'alias')
+            alias_api_node.text = f'api-{api_node_number}'
+
+        # Content Nodes
+        for group_number in range(self.number_of_replicas + 1):  # +1 for the primary group
+            for shard_number in range(self.number_of_shards):
+                content_node = ET.SubElement(hosts, 'host',
+                                             {'name': f'content-{group_number}-{shard_number}.vespanet'})
+                alias_content_node = ET.SubElement(content_node, 'alias')
+                alias_content_node.text = f'content-{group_number}-{shard_number}'
+
+        # Convert the ElementTree to a string
+        rough_string = ET.tostring(hosts, 'utf-8')
+        reparsed = minidom.parseString(rough_string)
+        pretty_xml_bytes = reparsed.toprettyxml(indent="    ", encoding='utf-8')
+        pretty_xml = pretty_xml_bytes.decode('utf-8')
+
+        print("Generated hosts.xml content successfully!")
+        return pretty_xml
+
+    def start(self):
+        # Generate the docker compose file
+        self.generate_docker_compose(
+            vespa_version=VESPA_VERSION
+        )
+
+        # Start the docker compose
+        os.system("docker compose down 2>/dev/null || true")
+        os.system("docker compose up -d")
+
+    def wait_vespa_running(self, max_wait_time: int = 20):
+        # Just wait 20 seconds
+        print(f"Waiting for Vespa to start for {max_wait_time} seconds.")
+        time.sleep(max_wait_time)
+
+
+def container_exists(container_name):
+    client = docker.from_env()
+    try:
+        container = client.containers.get(container_name)
+        return True
+    except docker.errors.NotFound:
+        return False
+    except docker.errors.APIError as e:
+        print(f"Error accessing Docker API: {e}")
+        return False
+
+
+def validate_shards_and_replicas_count(args):
+    if args.Shards < 1:
+        raise ValueError("Number of shards must be at least 1.")
+    if args.Replicas < 0:
+        raise ValueError("Number of replicas must be at least 0.")
+
+
+# Callable functions from workflows or CLI
+# These functions will call the appropriate methods from single/multi node vespa setups.
+def full_start(args):
+    # Create instance of VespaLocal
+    # vespa_local_instance is used for starting vespa & generating application package.
+    validate_shards_and_replicas_count(args)
+    if args.Shards > 1 or args.Replicas > 0:
+        vespa_local_instance = VespaLocalMultiNode(args.Shards, args.Replicas)
+    else:
+        vespa_local_instance = VespaLocalSingleNode()
+
+    # Start Vespa
+    vespa_local_instance.start()
+    # Wait until vespa is up and running
+    vespa_local_instance.wait_vespa_running()
+    # Generate the application package
+    zip_file_path = vespa_local_instance.generate_application_package()
+    # Deploy the application package
+    time.sleep(10)
+    deploy_application_package(zip_file_path)
+    # Check if Vespa is up and running
+    has_vespa_converged()
 
 
 def start(args):
-    os.system("docker rm -f vespa 2>/dev/null || true")
+    # Normal start command without deploying (recommended to use full_start instead)
+    # Create instance of VespaLocal
+    # vespa_local_instance is used for starting vespa & generating application package.
+    validate_shards_and_replicas_count(args)
+    if args.Shards > 1 or args.Replicas > 0:
+        vespa_local_instance = VespaLocalMultiNode(args.Shards, args.Replicas)
+    else:
+        vespa_local_instance = VespaLocalSingleNode()
 
-    os.system("docker run --detach "
-              "--name vespa "
-              "--hostname vespa-container "
-              "--publish 8080:8080 --publish 19071:19071 --publish 2181:2181 --publish 127.0.0.1:5005:5005 "
-              f"vespaengine/vespa:{VESPA_VERSION}")
+    vespa_local_instance.start()
 
 
 def restart(args):
-    os.system("docker restart vespa")
+    if container_exists("vespa"):
+        print("Single Node Vespa setup found (container with name 'vespa'). Restarting container.")
+        os.system("docker restart vespa")
+    else:
+        print("Assuming Multi Node Vespa setup. Restarting all containers.")
+        os.system("docker compose restart")
+
+
+def stop(args):
+    if container_exists("vespa"):
+        print("Single Node Vespa setup found (container with name 'vespa'). Stopping container.")
+        os.system("docker stop vespa")
+    else:
+        print("Assuming Multi Node Vespa setup. Stopping and removing all containers.")
+        os.system("docker compose down")
 
 
 def deploy_config(args):
+    """
+    Deploy the config using Vespa CLI assuming this directory contains the vespa application files
+    """
     os.system('vespa config set target local')
     here = os.path.dirname(os.path.abspath(__file__))
     os.system(f'vespa deploy "{here}"')
 
 
-def stop(args):
-    os.system('docker stop vespa')
+def deploy_application_package(zip_file_path: str, max_retries: int = 5, backoff_factor: float = 0.5) -> None:
+    # URL and headers
+    url = f"{VESPA_CONFIG_URL}/application/v2/tenant/default/prepareandactivate"
+    headers = {
+        "Content-Type": "application/zip"
+    }
+
+    # Ensure the zip file exists
+    if not os.path.isfile(zip_file_path):
+        print("Zip file does not exist.")
+        return
+
+    print("Start deploying the application package...")
+
+    # Attempt to send the request with retries
+    for attempt in range(max_retries):
+        try:
+            with open(zip_file_path, 'rb') as zip_file:
+                response = requests.post(url, headers=headers, data=zip_file)
+            print(response.text)
+            break  # Success, exit the retry loop
+        except requests.exceptions.RequestException as e:
+            print(f"Attempt {attempt + 1} failed due to a request error: {e}")
+            if attempt < max_retries - 1:
+                # Calculate sleep time using exponential backoff
+                sleep_time = backoff_factor * (2 ** attempt)
+                print(f"Retrying in {sleep_time} seconds...")
+                time.sleep(sleep_time)
+            else:
+                print("Max retries reached. Aborting.")
+                return
+
+    # Cleanup
+    os.remove(zip_file_path)
+    print("Zip file removed.")
+
+
+def has_vespa_converged(waiting_time: int = 600) -> bool:
+    print("Checking if Vespa has converged...")
+    converged = False
+    start_time = time.time()
+    while time.time() - start_time < waiting_time:
+        try:
+            response = requests.get(
+                f"{VESPA_CONFIG_URL}/application/v2/tenant/default/application/default/environment/prod/region/"
+                f"default/instance/default/serviceconverge")
+            data = response.json()
+            if data.get('converged') == True:
+                converged = True
+                break
+            print("  Waiting for Vespa convergence to be true...")
+        except Exception as e:
+            print(f"  Error checking convergence: {str(e)}")
+
+        time.sleep(10)
+
+    if not converged:
+        print("Vespa did not converge in time")
+        sys.exit(1)
+
+    print("Vespa application has converged. Vespa setup complete!")
 
 
 def main():
     parser = argparse.ArgumentParser(description="CLI for local Vespa deployment.")
-
     subparsers = parser.add_subparsers(title="modes", description="Available modes", help="Deployment modes",
                                        dest='mode')
     subparsers.required = True  # Ensure that a mode is always specified
 
-    prepare_parser = subparsers.add_parser("start", help="Start local Vespa")
-    prepare_parser.set_defaults(func=start)
+    full_start_parser = subparsers.add_parser("full-start",
+                                         help="Start local Vespa, build package, deploy, and wait for readiness.")
+    full_start_parser.set_defaults(func=full_start)
+    full_start_parser.add_argument('--Shards', help='The number of shards', default=1, type=int)
+    full_start_parser.add_argument('--Replicas', help='The number of replicas', default=0, type=int)
+
+    start_parser = subparsers.add_parser("start",
+                                         help="Start local Vespa only")
+    start_parser.set_defaults(func=start)
+    start_parser.add_argument('--Shards', help='The number of shards', default=1, type=int)
+    start_parser.add_argument('--Replicas', help='The number of replicas', default=0, type=int)
 
     prepare_parser = subparsers.add_parser("restart", help="Restart existing local Vespa")
     prepare_parser.set_defaults(func=restart)
 
     eks_parser = subparsers.add_parser("deploy-config", help="Deploy config")
-    eks_parser.set_defaults(func=deploy_config)
+    eks_parser.set_defaults(func=deploy_config)     # TODO: Set this to deploy_application_package
 
     clean_parser = subparsers.add_parser("stop", help="Stop local Vespa")
     clean_parser.set_defaults(func=stop)
@@ -59,3 +688,5 @@ def main():
 
 if __name__ == "__main__":
     main()
+
+
diff --git a/tests/api_tests/v1/requirements.txt b/tests/api_tests/v1/requirements.txt
index ecfdc89c7..a820f2e0b 100644
--- a/tests/api_tests/v1/requirements.txt
+++ b/tests/api_tests/v1/requirements.txt
@@ -2,3 +2,5 @@ pillow
 numpy
 pytest
 flake8
+PyYAML
+docker
\ No newline at end of file
diff --git a/tests/api_tests/v1/scripts/start_local_marqo.sh b/tests/api_tests/v1/scripts/start_local_marqo.sh
index 4a8375384..59baa1241 100644
--- a/tests/api_tests/v1/scripts/start_local_marqo.sh
+++ b/tests/api_tests/v1/scripts/start_local_marqo.sh
@@ -7,8 +7,8 @@ set -e
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 
-python3 "$SCRIPT_DIR/start_vespa.py"
-
+# Start single node vespa
+python3 "$SCRIPT_DIR/../../../../scripts/vespa_local/vespa_local.py" full-start
 MARQO_DOCKER_IMAGE="$1"
 shift
 
diff --git a/tests/api_tests/v1/scripts/start_vespa.py b/tests/api_tests/v1/scripts/start_vespa.py
deleted file mode 100644
index 41f226911..000000000
--- a/tests/api_tests/v1/scripts/start_vespa.py
+++ /dev/null
@@ -1,227 +0,0 @@
-"""This is a script that pull/start the vespa docker image and deploy a dummy application package.
-
-It can be used in Marqo local runs to start Vespa outside the Marqo docker container. This requires
-that the host machine has docker installed.
-
-We generate a schema.sd file and a services.xml file and put them in a zip file. We then deploy the zip file
-using the REST API. After that, we check if Vespa is up and running. If it is, we can start Marqo.
-
-All the files are created in a directory called vespa_dummy_application_package. This directory is removed and
-the zip file is removed after the application package is deployed.
-
-Note: Vespa CLI is not needed as we use the REST API to deploy the application package.
-"""
-
-import os
-import shutil
-import subprocess
-import textwrap
-import time
-import sys
-
-import requests
-
-VESPA_VERSION=os.getenv('VESPA_VERSION', '8.472.109')
-
-
-def start_vespa() -> None:
-    os.system("docker rm -f vespa 2>/dev/null || true")
-    os.system("docker run --detach "
-              "--name vespa "
-              "--hostname vespa-container "
-              "--publish 8080:8080 --publish 19071:19071 --publish 2181:2181 "
-              f"vespaengine/vespa:{VESPA_VERSION}")
-
-
-def get_services_xml_content() -> str:
-    return textwrap.dedent(
-    """<?xml version="1.0" encoding="utf-8" ?>
-    <!-- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
-    <services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">
-        <container id="default" version="1.0">
-            <document-api/>
-            <search/>
-            <nodes>
-                <node hostalias="node1"/>
-            </nodes>
-        </container>
-        <content id="content_default" version="1.0">
-            <redundancy>2</redundancy>
-            <documents>
-                <document type="test_vespa_client" mode="index"/>
-            </documents>
-            <nodes>
-                <node hostalias="node1" distribution-key="0"/>
-            </nodes>
-        </content>
-    </services>
-    """)
-
-
-def get_test_vespa_client_schema_content() -> str:
-    return textwrap.dedent("""
-    schema test_vespa_client {
-        document test_vespa_client {
-    
-            field id type string {
-                indexing: summary | attribute
-            }
-    
-            field title type string {
-                indexing: summary | attribute | index
-                index: enable-bm25
-            }
-    
-            field contents type string {
-                indexing: summary | attribute | index
-                index: enable-bm25
-            }
-    
-        }
-    
-        fieldset default {
-            fields: title, contents
-        }
-    
-        rank-profile bm25 inherits default {
-            first-phase {
-                expression: bm25(title) + bm25(contents)
-            }
-        }
-    }
-    """)
-
-
-def generate_application_package() -> str:
-    base_dir = "vespa_dummy_application_package"
-    subdirs = ["schemas"]
-    files = {
-        "schemas": ["test_vespa_client.sd"],
-        "": ["services.xml"]
-    }
-    # Content for the files
-    content_for_services_xml = get_services_xml_content()
-    content_for_test_vespa_client_sd = get_test_vespa_client_schema_content()
-    # Create the directories and files, and write content
-    os.makedirs(base_dir, exist_ok=True)
-    for subdir in subdirs:
-        os.makedirs(os.path.join(base_dir, subdir), exist_ok=True)
-        for file in files[subdir]:
-            file_path = os.path.join(base_dir, subdir, file)
-            with open(file_path, 'w') as f:
-                if file == "test_vespa_client.sd":
-                    f.write(content_for_test_vespa_client_sd)
-    for file in files[""]:
-        file_path = os.path.join(base_dir, file)
-        with open(file_path, 'w') as f:
-            if file == "services.xml":
-                f.write(content_for_services_xml)
-    os.chdir(base_dir)
-    shutil.make_archive('../' + base_dir, 'zip', ".")
-    os.chdir("..")
-    zip_file_path = f"{base_dir}.zip"
-
-    if os.path.isfile(zip_file_path):
-        print(f"Zip file created successfully: {zip_file_path}")
-        # Remove the base directory
-        shutil.rmtree(base_dir)
-        print(f"Directory {base_dir} removed.")
-        return zip_file_path
-    else:
-        print("Failed to create the zip file.")
-
-
-def deploy_application_package(zip_file_path: str, max_retries: int = 5, backoff_factor: float = 0.5) -> None:
-    # URL and headers
-    url = "http://localhost:19071/application/v2/tenant/default/prepareandactivate"
-    headers = {
-        "Content-Type": "application/zip"
-    }
-
-    # Ensure the zip file exists
-    if not os.path.isfile(zip_file_path):
-        print("Zip file does not exist.")
-        return
-
-    print("Start deploying the application package...")
-
-    # Attempt to send the request with retries
-    for attempt in range(max_retries):
-        try:
-            with open(zip_file_path, 'rb') as zip_file:
-                response = requests.post(url, headers=headers, data=zip_file)
-            print(response.text)
-            break  # Success, exit the retry loop
-        except requests.exceptions.RequestException as e:
-            print(f"Attempt {attempt + 1} failed due to a request error: {e}")
-            if attempt < max_retries - 1:
-                # Calculate sleep time using exponential backoff
-                sleep_time = backoff_factor * (2 ** attempt)
-                print(f"Retrying in {sleep_time} seconds...")
-                time.sleep(sleep_time)
-            else:
-                print("Max retries reached. Aborting.")
-                return
-
-    # Cleanup
-    os.remove(zip_file_path)
-    print("Zip file removed.")
-
-
-def is_vespa_up(waiting_time: int = 60) -> bool:
-    for _ in range(waiting_time):
-        document_url = "http://localhost:8080"
-        try:
-            document_request = requests.get(document_url)
-            if document_request.status_code == 200:
-                print(f"Vespa is up and running! You can start Marqo. Make sure you set the Vespa environment variable")
-                return True
-        except requests.exceptions.ConnectionError:
-            pass
-        time.sleep(1)
-
-    print(f"Vespa is not up and running after {waiting_time}s")
-
-
-def wait_vespa_container_running(max_wait_time: int = 60):
-    start_time = time.time()
-    # Check if the container is running
-    while True:
-        if time.time() - start_time > max_wait_time:
-            print("Maximum wait time exceeded. Vespa container may not be running.")
-            break
-
-        try:
-            output = subprocess.check_output(["docker", "inspect", "--format", "{{.State.Status}}", "vespa"])
-            if output.decode().strip() == "running":
-                print("Vespa container is up and running.")
-                break
-        except subprocess.CalledProcessError:
-            pass
-
-        print("Waiting for Vespa container to start...")
-        time.sleep(5)
-
-
-def main():
-    try:
-        # Start Vespa
-        start_vespa()
-        # Wait for the container is pulled and running
-        wait_vespa_container_running()
-        # Generate the application package
-        zip_file_path = generate_application_package()
-        # Deploy the application package
-        time.sleep(10)
-        deploy_application_package(zip_file_path)
-        # Check if Vespa is up and running
-        is_vespa_up()
-    except Exception as e:
-        print(f"An error occurred when staring vespa: {e}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
-
-
diff --git a/tests/integ_tests/conftest.py b/tests/integ_tests/conftest.py
index 5a68cb57a..6731f03ad 100644
--- a/tests/integ_tests/conftest.py
+++ b/tests/integ_tests/conftest.py
@@ -3,17 +3,20 @@
 
 def pytest_addoption(parser):
     parser.addoption("--largemodel", action="store_true", default=False)
+    parser.addoption("--multinode", action="store_true", default=False, help="Run tests that have multiple Vespa nodes")
 
 
 def pytest_configure(config):
     config.addinivalue_line("markers", "largemodel: mark test as largemodels")
     config.addinivalue_line("markers", "cpu_only: mark test as cpu_only")
     config.addinivalue_line("markers", "unittest: mark test as unit test, it does not require vespa to run")
+    config.addinivalue_line("markers", "skip_for_multinode: mark test as multinode, it requires multiple Vespa nodes to run")
 
 
 def pytest_collection_modifyitems(config, items):
     skip_largemodel = pytest.mark.skip(reason="need --largemodel option to run")
     skip_cpu_only = pytest.mark.skip(reason="skip in --largemodel mode when cpu_only is present")
+    skip_multinode = pytest.mark.skip(reason="Skipped because --multinode was used")
 
     if config.getoption("--largemodel"):
         # --largemodel given in cli: only run tests that have largemodel marker
@@ -24,3 +27,8 @@ def pytest_collection_modifyitems(config, items):
         for item in items:
             if "largemodel" in item.keywords:
                 item.add_marker(skip_largemodel)
+
+    if config.getoption("--multinode"):
+        for item in items:
+            if "skip_for_multinode" in item.keywords:
+                item.add_marker(skip_multinode)
\ No newline at end of file
diff --git a/tests/integ_tests/tensor_search/backwards_compat/test_search_regression.py b/tests/integ_tests/tensor_search/backwards_compat/test_search_regression.py
index 248e45bb6..4d73b6fa2 100644
--- a/tests/integ_tests/tensor_search/backwards_compat/test_search_regression.py
+++ b/tests/integ_tests/tensor_search/backwards_compat/test_search_regression.py
@@ -1,6 +1,7 @@
 import os
 import uuid
 from unittest import mock
+import pytest
 
 import marqo.core.exceptions as core_exceptions
 from marqo.core.models.marqo_index import *
@@ -96,6 +97,7 @@ def tearDown(self) -> None:
         super().tearDown()
         self.device_patcher.stop()
 
+    @pytest.mark.skip_for_multinode
     def test_search_result_scores_match_2_9(self):
         """
         Tests that both lexical and tensor search results and scores match those
diff --git a/tests/integ_tests/tensor_search/integ_tests/test_add_documents_semi_structured.py b/tests/integ_tests/tensor_search/integ_tests/test_add_documents_semi_structured.py
index b7ffc3d5c..ecc7fa243 100644
--- a/tests/integ_tests/tensor_search/integ_tests/test_add_documents_semi_structured.py
+++ b/tests/integ_tests/tensor_search/integ_tests/test_add_documents_semi_structured.py
@@ -729,6 +729,7 @@ def test_supported_large_integer_and_float_number(self):
                 print(res)
                 self.assertEqual(res['errors'], error)
 
+    @pytest.mark.skip_for_multinode
     def test_duplicate_ids_behaviour(self):
         """Test the behaviour when there are duplicate ids in a single batch.
 
diff --git a/tests/integ_tests/tensor_search/integ_tests/test_hybrid_search.py b/tests/integ_tests/tensor_search/integ_tests/test_hybrid_search.py
index 103c6d15d..374e16e16 100644
--- a/tests/integ_tests/tensor_search/integ_tests/test_hybrid_search.py
+++ b/tests/integ_tests/tensor_search/integ_tests/test_hybrid_search.py
@@ -19,6 +19,7 @@
 from marqo.tensor_search.models.api_models import CustomVectorQuery
 from marqo.tensor_search.models.api_models import ScoreModifierLists
 from marqo.tensor_search.models.search import SearchContext
+import pytest
 
 
 class TestHybridSearch(MarqoTestCase):
@@ -1486,7 +1487,7 @@ def test_hybrid_search_global_score_modifiers_with_rerank_depth(self):
                 self.assertEqual(["tensor2", "tensor1", "both1"], [hit["_id"] for hit in modified_res["hits"]])
 
 
-
+    @pytest.mark.skip_for_multinode
     def test_hybrid_search_lexical_tensor_with_lexical_score_modifiers_succeeds(self):
         """
         Tests that if we do hybrid search with lexical retrieval and tensor ranking, we can use both lexical and tensor
@@ -1553,7 +1554,7 @@ def test_hybrid_search_lexical_tensor_with_lexical_score_modifiers_succeeds(self
                 self.assertEqual(hybrid_res["hits"][2]["_id"], "doc10")     # (score*-10*3)
                 self.assertEqual(hybrid_res["hits"][2]["_score"], -30.0)
 
-
+    @pytest.mark.skip_for_multinode
     def test_hybrid_search_same_retrieval_and_ranking_matches_original_method(self):
         """
         Tests that hybrid search with:
@@ -1657,6 +1658,7 @@ def test_hybrid_search_with_filter(self):
                         self.assertEqual(len(hybrid_res["hits"]), 1)
                         self.assertEqual(hybrid_res["hits"][0]["_id"], "doc8")
 
+    @pytest.mark.skip_for_multinode
     def test_hybrid_search_with_images(self):
         """
         Tests that hybrid search is accurate with images, both in query and in documents.
diff --git a/tests/integ_tests/tensor_search/test_pagination.py b/tests/integ_tests/tensor_search/test_pagination.py
index 2763d8248..e01284324 100644
--- a/tests/integ_tests/tensor_search/test_pagination.py
+++ b/tests/integ_tests/tensor_search/test_pagination.py
@@ -4,6 +4,7 @@
 import string
 import unittest
 from unittest import mock
+import pytest
 
 import requests
 
@@ -97,6 +98,7 @@ def generate_unique_strings(self, num_strings):
 
         return list(unique_strings)
 
+    @pytest.mark.skip_for_multinode
     def test_pagination_single_field(self):
         num_docs = 400
         batch_size = 100
@@ -156,6 +158,7 @@ def test_pagination_single_field(self):
                             self.assertEqual(full_search_results["hits"][i]["_id"], paginated_search_results["hits"][i]["_id"])
                             self.assertEqual(full_search_results["hits"][i]["_score"], paginated_search_results["hits"][i]["_score"])
 
+    @pytest.mark.skip_for_multinode
     def test_pagination_hybrid(self):
         num_docs = 400
         batch_size = 100
diff --git a/tests/integ_tests/tensor_search/test_searchable_attributes.py b/tests/integ_tests/tensor_search/test_searchable_attributes.py
index 5472dec27..bdca09707 100644
--- a/tests/integ_tests/tensor_search/test_searchable_attributes.py
+++ b/tests/integ_tests/tensor_search/test_searchable_attributes.py
@@ -6,6 +6,7 @@
 from marqo.tensor_search import tensor_search
 from marqo.core.models.add_docs_params import AddDocsParams
 from integ_tests.marqo_test import MarqoTestCase
+import pytest
 
 
 class TestSearchableAttributes(MarqoTestCase):
@@ -167,6 +168,7 @@ def test_searchable_attributes_None(self):
                         )
                         self.assertEqual(3, len(res["hits"]))
 
+    @pytest.mark.skip_for_multinode
     def test_searchable_attributes_behaves_the_same_way_for_different_types_of_indexes(self):
         for index_name in [self.structured_text_index, self.semi_structured_text_index]:
             self._add_documents(index_name)
diff --git a/tests/unit_tests/marqo/scripts/__init__.py b/tests/unit_tests/marqo/scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/unit_tests/marqo/scripts/expected/docker-compose_1_shard_1_replica.yml b/tests/unit_tests/marqo/scripts/expected/docker-compose_1_shard_1_replica.yml
new file mode 100644
index 000000000..773a34c3a
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/docker-compose_1_shard_1_replica.yml
@@ -0,0 +1,149 @@
+services:
+  config-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-0
+    hostname: config-0.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19071:19071
+    - 19100:19100
+    - 19050:19050
+    - 2181:2181
+    - 20092:19092
+    - 19098:19098
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  config-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-1
+    hostname: config-1.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19072:19071
+    - 19101:19100
+    - 19051:19050
+    - 2182:2181
+    - 20093:19092
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  config-2:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-2
+    hostname: config-2.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19073:19071
+    - 19102:19100
+    - 19052:19050
+    - 2183:2181
+    - 20094:19092
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  api-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: api-0
+    hostname: api-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 8080:8080
+    - 5005:5005
+    - 20095:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  api-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: api-1
+    hostname: api-1.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 8081:8080
+    - 5006:5005
+    - 20096:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-0-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-0-0
+    hostname: content-0-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19107:19107
+    - 20097:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-1-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-1-0
+    hostname: content-1-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19108:19107
+    - 20098:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+networks:
+  vespanet:
+    driver: bridge
diff --git a/tests/unit_tests/marqo/scripts/expected/docker-compose_2_shard_0_replica.yml b/tests/unit_tests/marqo/scripts/expected/docker-compose_2_shard_0_replica.yml
new file mode 100644
index 000000000..70181cf51
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/docker-compose_2_shard_0_replica.yml
@@ -0,0 +1,149 @@
+services:
+  config-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-0
+    hostname: config-0.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19071:19071
+    - 19100:19100
+    - 19050:19050
+    - 2181:2181
+    - 20092:19092
+    - 19098:19098
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  config-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-1
+    hostname: config-1.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19072:19071
+    - 19101:19100
+    - 19051:19050
+    - 2182:2181
+    - 20093:19092
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  config-2:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-2
+    hostname: config-2.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19073:19071
+    - 19102:19100
+    - 19052:19050
+    - 2183:2181
+    - 20094:19092
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  api-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: api-0
+    hostname: api-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 8080:8080
+    - 5005:5005
+    - 20095:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  api-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: api-1
+    hostname: api-1.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 8081:8080
+    - 5006:5005
+    - 20096:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-0-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-0-0
+    hostname: content-0-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19107:19107
+    - 20097:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-0-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-0-1
+    hostname: content-0-1.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19108:19107
+    - 20098:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+networks:
+  vespanet:
+    driver: bridge
diff --git a/tests/unit_tests/marqo/scripts/expected/docker-compose_2_shard_1_replica.yml b/tests/unit_tests/marqo/scripts/expected/docker-compose_2_shard_1_replica.yml
new file mode 100644
index 000000000..e07f18eb0
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/docker-compose_2_shard_1_replica.yml
@@ -0,0 +1,187 @@
+services:
+  config-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-0
+    hostname: config-0.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19071:19071
+    - 19100:19100
+    - 19050:19050
+    - 2181:2181
+    - 20092:19092
+    - 19098:19098
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  config-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-1
+    hostname: config-1.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19072:19071
+    - 19101:19100
+    - 19051:19050
+    - 2182:2181
+    - 20093:19092
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  config-2:
+    image: vespaengine/vespa:8.431.32
+    container_name: config-2
+    hostname: config-2.vespanet
+    environment:
+      VESPA_CONFIGSERVERS: config-0.vespanet,config-1.vespanet,config-2.vespanet
+      VESPA_CONFIGSERVER_JVMARGS: -Xms32M -Xmx128M
+      VESPA_CONFIGPROXY_JVMARGS: -Xms32M -Xmx128M
+    networks:
+    - vespanet
+    ports:
+    - 19073:19071
+    - 19102:19100
+    - 19052:19050
+    - 2183:2181
+    - 20094:19092
+    command: configserver,services
+    healthcheck:
+      test: curl http://localhost:19071/state/v1/health
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+  api-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: api-0
+    hostname: api-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 8080:8080
+    - 5005:5005
+    - 20095:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  api-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: api-1
+    hostname: api-1.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 8081:8080
+    - 5006:5005
+    - 20096:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-0-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-0-0
+    hostname: content-0-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19107:19107
+    - 20097:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-0-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-0-1
+    hostname: content-0-1.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19108:19107
+    - 20098:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-1-0:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-1-0
+    hostname: content-1-0.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19109:19107
+    - 20099:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+  content-1-1:
+    image: vespaengine/vespa:8.431.32
+    container_name: content-1-1
+    hostname: content-1-1.vespanet
+    environment:
+    - VESPA_CONFIGSERVERS=config-0.vespanet,config-1.vespanet,config-2.vespanet
+    networks:
+    - vespanet
+    ports:
+    - 19110:19107
+    - 20100:19092
+    command: services
+    depends_on:
+      config-0:
+        condition: service_healthy
+      config-1:
+        condition: service_healthy
+      config-2:
+        condition: service_healthy
+networks:
+  vespanet:
+    driver: bridge
diff --git a/tests/unit_tests/marqo/scripts/expected/hosts_1_shard_1_replica.xml b/tests/unit_tests/marqo/scripts/expected/hosts_1_shard_1_replica.xml
new file mode 100644
index 000000000..134ced07d
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/hosts_1_shard_1_replica.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<hosts>
+    <host name="config-0.vespanet">
+        <alias>config-0</alias>
+    </host>
+    <host name="config-1.vespanet">
+        <alias>config-1</alias>
+    </host>
+    <host name="config-2.vespanet">
+        <alias>config-2</alias>
+    </host>
+    <host name="api-0.vespanet">
+        <alias>api-0</alias>
+    </host>
+    <host name="api-1.vespanet">
+        <alias>api-1</alias>
+    </host>
+    <host name="content-0-0.vespanet">
+        <alias>content-0-0</alias>
+    </host>
+    <host name="content-1-0.vespanet">
+        <alias>content-1-0</alias>
+    </host>
+</hosts>
diff --git a/tests/unit_tests/marqo/scripts/expected/hosts_2_shard_0_replica.xml b/tests/unit_tests/marqo/scripts/expected/hosts_2_shard_0_replica.xml
new file mode 100644
index 000000000..9ab4fdb49
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/hosts_2_shard_0_replica.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="utf-8"?>
+<hosts>
+    <host name="config-0.vespanet">
+        <alias>config-0</alias>
+    </host>
+    <host name="config-1.vespanet">
+        <alias>config-1</alias>
+    </host>
+    <host name="config-2.vespanet">
+        <alias>config-2</alias>
+    </host>
+    <host name="api-0.vespanet">
+        <alias>api-0</alias>
+    </host>
+    <host name="api-1.vespanet">
+        <alias>api-1</alias>
+    </host>
+    <host name="content-0-0.vespanet">
+        <alias>content-0-0</alias>
+    </host>
+    <host name="content-0-1.vespanet">
+        <alias>content-0-1</alias>
+    </host>
+</hosts>
diff --git a/tests/unit_tests/marqo/scripts/expected/hosts_2_shard_1_replica.xml b/tests/unit_tests/marqo/scripts/expected/hosts_2_shard_1_replica.xml
new file mode 100644
index 000000000..033d2edfc
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/hosts_2_shard_1_replica.xml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="utf-8"?>
+<hosts>
+    <host name="config-0.vespanet">
+        <alias>config-0</alias>
+    </host>
+    <host name="config-1.vespanet">
+        <alias>config-1</alias>
+    </host>
+    <host name="config-2.vespanet">
+        <alias>config-2</alias>
+    </host>
+    <host name="api-0.vespanet">
+        <alias>api-0</alias>
+    </host>
+    <host name="api-1.vespanet">
+        <alias>api-1</alias>
+    </host>
+    <host name="content-0-0.vespanet">
+        <alias>content-0-0</alias>
+    </host>
+    <host name="content-0-1.vespanet">
+        <alias>content-0-1</alias>
+    </host>
+    <host name="content-1-0.vespanet">
+        <alias>content-1-0</alias>
+    </host>
+    <host name="content-1-1.vespanet">
+        <alias>content-1-1</alias>
+    </host>
+</hosts>
diff --git a/scripts/vespa_local/services.xml b/tests/unit_tests/marqo/scripts/expected/services_1_shard_0_replica.xml
similarity index 92%
rename from scripts/vespa_local/services.xml
rename to tests/unit_tests/marqo/scripts/expected/services_1_shard_0_replica.xml
index 2c1df1e87..0a7597a04 100644
--- a/scripts/vespa_local/services.xml
+++ b/tests/unit_tests/marqo/scripts/expected/services_1_shard_0_replica.xml
@@ -1,5 +1,6 @@
 <?xml version="1.0" encoding="utf-8" ?>
 <!-- Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<!-- This is the services.xml for a single node Vespa setup -->
 <services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">
     <container id="default" version="1.0">
         <document-api/>
diff --git a/tests/unit_tests/marqo/scripts/expected/services_1_shard_1_replica.xml b/tests/unit_tests/marqo/scripts/expected/services_1_shard_1_replica.xml
new file mode 100644
index 000000000..acfe2486c
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/services_1_shard_1_replica.xml
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="utf-8"?>
+<services xmlns:deploy="vespa" xmlns:preprocess="properties" version="1.0">
+    <admin version="2.0">
+        <configservers>
+            <configserver hostalias="config-0"/>
+            <configserver hostalias="config-1"/>
+            <configserver hostalias="config-2"/>
+        </configservers>
+        <cluster-controllers>
+            <cluster-controller hostalias="config-0" jvm-options="-Xms32M -Xmx64M"/>
+            <cluster-controller hostalias="config-1" jvm-options="-Xms32M -Xmx64M"/>
+            <cluster-controller hostalias="config-2" jvm-options="-Xms32M -Xmx64M"/>
+        </cluster-controllers>
+        <slobroks>
+            <slobrok hostalias="config-0"/>
+            <slobrok hostalias="config-1"/>
+            <slobrok hostalias="config-2"/>
+        </slobroks>
+        <adminserver hostalias="config-0"/>
+    </admin>
+    <container id="default" version="1.0">
+        <document-api/>
+        <search/>
+        <nodes>
+            <jvm options="-Xms32M -Xmx256M -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"/>
+            <node hostalias="api-0"/>
+            <node hostalias="api-1"/>
+        </nodes>
+    </container>
+    <content id="content_default" version="1.0">
+        <redundancy>2</redundancy>
+        <documents>
+            <document type="test_vespa_client" mode="index"/>
+        </documents>
+        <group>
+            <distribution partitions="1|*"/>
+            <group name="group-0" distribution-key="0">
+                <node hostalias="content-0-0" distribution-key="0"/>
+            </group>
+            <group name="group-1" distribution-key="1">
+                <node hostalias="content-1-0" distribution-key="1"/>
+            </group>
+        </group>
+    </content>
+</services>
diff --git a/tests/unit_tests/marqo/scripts/expected/services_2_shard_0_replica.xml b/tests/unit_tests/marqo/scripts/expected/services_2_shard_0_replica.xml
new file mode 100644
index 000000000..870a8b869
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/services_2_shard_0_replica.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="utf-8"?>
+<services xmlns:deploy="vespa" xmlns:preprocess="properties" version="1.0">
+    <admin version="2.0">
+        <configservers>
+            <configserver hostalias="config-0"/>
+            <configserver hostalias="config-1"/>
+            <configserver hostalias="config-2"/>
+        </configservers>
+        <cluster-controllers>
+            <cluster-controller hostalias="config-0" jvm-options="-Xms32M -Xmx64M"/>
+            <cluster-controller hostalias="config-1" jvm-options="-Xms32M -Xmx64M"/>
+            <cluster-controller hostalias="config-2" jvm-options="-Xms32M -Xmx64M"/>
+        </cluster-controllers>
+        <slobroks>
+            <slobrok hostalias="config-0"/>
+            <slobrok hostalias="config-1"/>
+            <slobrok hostalias="config-2"/>
+        </slobroks>
+        <adminserver hostalias="config-0"/>
+    </admin>
+    <container id="default" version="1.0">
+        <document-api/>
+        <search/>
+        <nodes>
+            <jvm options="-Xms32M -Xmx256M -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"/>
+            <node hostalias="api-0"/>
+            <node hostalias="api-1"/>
+        </nodes>
+    </container>
+    <content id="content_default" version="1.0">
+        <redundancy>1</redundancy>
+        <documents>
+            <document type="test_vespa_client" mode="index"/>
+        </documents>
+        <group>
+            <distribution partitions="*"/>
+            <group name="group-0" distribution-key="0">
+                <node hostalias="content-0-0" distribution-key="0"/>
+                <node hostalias="content-0-1" distribution-key="1"/>
+            </group>
+        </group>
+    </content>
+</services>
diff --git a/tests/unit_tests/marqo/scripts/expected/services_2_shard_1_replica.xml b/tests/unit_tests/marqo/scripts/expected/services_2_shard_1_replica.xml
new file mode 100644
index 000000000..16a93b685
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/expected/services_2_shard_1_replica.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="utf-8"?>
+<services xmlns:deploy="vespa" xmlns:preprocess="properties" version="1.0">
+    <admin version="2.0">
+        <configservers>
+            <configserver hostalias="config-0"/>
+            <configserver hostalias="config-1"/>
+            <configserver hostalias="config-2"/>
+        </configservers>
+        <cluster-controllers>
+            <cluster-controller hostalias="config-0" jvm-options="-Xms32M -Xmx64M"/>
+            <cluster-controller hostalias="config-1" jvm-options="-Xms32M -Xmx64M"/>
+            <cluster-controller hostalias="config-2" jvm-options="-Xms32M -Xmx64M"/>
+        </cluster-controllers>
+        <slobroks>
+            <slobrok hostalias="config-0"/>
+            <slobrok hostalias="config-1"/>
+            <slobrok hostalias="config-2"/>
+        </slobroks>
+        <adminserver hostalias="config-0"/>
+    </admin>
+    <container id="default" version="1.0">
+        <document-api/>
+        <search/>
+        <nodes>
+            <jvm options="-Xms32M -Xmx256M -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"/>
+            <node hostalias="api-0"/>
+            <node hostalias="api-1"/>
+        </nodes>
+    </container>
+    <content id="content_default" version="1.0">
+        <redundancy>2</redundancy>
+        <documents>
+            <document type="test_vespa_client" mode="index"/>
+        </documents>
+        <group>
+            <distribution partitions="1|*"/>
+            <group name="group-0" distribution-key="0">
+                <node hostalias="content-0-0" distribution-key="0"/>
+                <node hostalias="content-0-1" distribution-key="1"/>
+            </group>
+            <group name="group-1" distribution-key="1">
+                <node hostalias="content-1-0" distribution-key="2"/>
+                <node hostalias="content-1-1" distribution-key="3"/>
+            </group>
+        </group>
+    </content>
+</services>
diff --git a/tests/unit_tests/marqo/scripts/test_vespa_local.py b/tests/unit_tests/marqo/scripts/test_vespa_local.py
new file mode 100644
index 000000000..641981d14
--- /dev/null
+++ b/tests/unit_tests/marqo/scripts/test_vespa_local.py
@@ -0,0 +1,166 @@
+import io
+import math
+import os
+import tempfile
+import unittest
+import yaml
+import docker
+from xml.etree import ElementTree as ET
+from xml.dom import minidom
+from unittest.mock import patch, mock_open, call
+from unit_tests.marqo_test import MarqoTestCase
+from scripts.vespa_local.vespa_local import VespaLocalSingleNode, VespaLocalMultiNode
+import builtins
+
+
+class TestVespaLocal(MarqoTestCase):
+    def setUp(self):
+        # Create a temporary directory and switch to it
+        self.test_dir = tempfile.TemporaryDirectory()
+        self.old_cwd = os.getcwd()
+        os.chdir(self.test_dir.name)
+
+        # Create a dedicated mock for write operations.
+        self.write_mock = mock_open()
+        self.real_open = builtins.open  # Save the unpatched builtins.open
+
+        self.test_cases = [
+            (1, 1),
+            (2, 0),
+            (2, 1)
+        ]
+
+    def tearDown(self):
+        os.chdir(self.old_cwd)
+        self.test_dir.cleanup()
+
+    def custom_open(self, path: str, mode: str, *args, **kwargs):
+        """
+        If mode is for reading, use the real open,
+        otherwise use a mock open.
+        """
+        if 'r' in mode and 'w' not in mode:
+            return self.real_open(path, mode, *args, **kwargs)
+        else:
+            # For write mode, we'll use our global mock_open provided from the patch.
+            return self.write_mock(path, mode, *args, **kwargs)
+
+    def _read_file(self, path: str) -> str:
+        currentdir = os.path.dirname(os.path.abspath(__file__))
+        abspath = os.path.join(currentdir, path)
+
+        with open(abspath, 'r') as f:
+            file_content = f.read()
+
+        return file_content
+
+
+class TestVespaLocalMultiNode(TestVespaLocal):
+    @patch("builtins.open", side_effect=lambda path, mode, *args, **kwargs: None)
+    def test_generate_docker_compose(self, patched_open):
+        VESPA_VERSION = "8.431.32"
+        # Patch file write (to check content) but use original open for reading.
+        patched_open.side_effect = self.custom_open
+        for number_of_shards, number_of_replicas in self.test_cases:
+            with self.subTest(number_of_shards=number_of_shards, number_of_replicas=number_of_replicas):
+                test_vespa_local = VespaLocalMultiNode(number_of_shards, number_of_replicas)
+                test_vespa_local.generate_docker_compose(VESPA_VERSION)
+
+                # Verify that docker-compose.yml is written
+                patched_open.assert_any_call('docker-compose.yml', 'w')
+
+                handle = self.write_mock()
+                written_yml = "".join([call_arg[0][0] for call_arg in handle.write.call_args_list])
+
+                # Check that written YML exactly matches the expected YML
+                expected_file_name = f"expected/docker-compose_{number_of_shards}_shard_{number_of_replicas}_replica.yml"
+                expected_yml = self._read_file(expected_file_name)
+                self.assertEqual(written_yml, expected_yml)
+
+                # Reset call_args_list to avoid tests failing due to previous calls
+                self.write_mock.reset_mock()
+
+    def test_generate_services_xml(self):
+        for number_of_shards, number_of_replicas in self.test_cases:
+            with (self.subTest(number_of_shards=number_of_shards, number_of_replicas=number_of_replicas)):
+                test_vespa_local = VespaLocalMultiNode(number_of_shards, number_of_replicas)
+                actual_services_xml_content = test_vespa_local.get_services_xml_content()
+
+                # Check that written XML exactly matches the expected XML
+                expected_file_name = f"expected/services_{number_of_shards}_shard_{number_of_replicas}_replica.xml"
+                expected_xml = self._read_file(expected_file_name)
+                self.assertEqual(actual_services_xml_content, expected_xml)
+
+    def test_generate_hosts_xml(self):
+        for number_of_shards, number_of_replicas in self.test_cases:
+            with (self.subTest(number_of_shards=number_of_shards, number_of_replicas=number_of_replicas)):
+                test_vespa_local = VespaLocalMultiNode(number_of_shards, number_of_replicas)
+                actual_hosts_xml_content = test_vespa_local.get_hosts_xml_content()
+
+                # Check that written XML exactly matches the expected XML
+                expected_file_name = f"expected/hosts_{number_of_shards}_shard_{number_of_replicas}_replica.xml"
+                expected_xml = self._read_file(expected_file_name)
+                self.assertEqual(actual_hosts_xml_content, expected_xml)
+    @patch("os.system")
+    @patch("builtins.open")
+    def test_start(self, mock_open, mock_system):
+        for number_of_shards, number_of_replicas in self.test_cases:
+            with self.subTest(number_of_shards=number_of_shards, number_of_replicas=number_of_replicas):
+                test_vespa_local = VespaLocalMultiNode(number_of_shards, number_of_replicas)
+                test_vespa_local.start()
+
+                # Check that os.system was called to copy and bring up docker compose.
+                expected_calls = [
+                    call("docker compose down 2>/dev/null || true"),
+                    call("docker compose up -d"),
+                ]
+                mock_system.assert_has_calls(expected_calls, any_order=True)
+
+    def test_generate_application_package_files_multi_node(self):
+        for number_of_shards, number_of_replicas in self.test_cases:
+            with self.subTest(number_of_shards=number_of_shards, number_of_replicas=number_of_replicas):
+                test_vespa_local = VespaLocalMultiNode(number_of_shards, number_of_replicas)
+                test_vespa_local.generate_application_package_files()
+                self.assertTrue(os.path.isdir("vespa_dummy_application_package"))
+                schema_file = os.path.join("vespa_dummy_application_package", "schemas", "test_vespa_client.sd")
+                self.assertTrue(os.path.isfile(schema_file))
+                with open(schema_file, "r") as f:
+                    content = f.read()
+                self.assertIn("schema test_vespa_client", content)
+                services_file = os.path.join("vespa_dummy_application_package", "services.xml")
+                hosts_file = os.path.join("vespa_dummy_application_package", "hosts.xml")
+                self.assertTrue(os.path.isfile(services_file))
+                self.assertTrue(os.path.isfile(hosts_file))
+
+
+class TestVespaLocalSingleNode(TestVespaLocal):
+    def setUp(self):
+        super().setUp()
+        self.test_vespa_local = VespaLocalSingleNode()
+
+    @patch("os.system")
+    @patch("builtins.open")
+    def test_start(self, mock_open, mock_system):
+        self.test_vespa_local.start()
+
+        # Check that os.system was called to copy and bring up docker compose.
+        expected_calls = [
+            call("docker rm -f vespa 2>/dev/null || true"),
+            call("docker run --detach "
+                  "--name vespa "
+                  "--hostname vespa-container "
+                  "--publish 8080:8080 --publish 19071:19071 --publish 2181:2181 --publish 127.0.0.1:5005:5005 "
+                  f"vespaengine/vespa:8.472.109"),
+        ]
+        mock_system.assert_has_calls(expected_calls, any_order=True)
+
+    def test_generate_application_package_files_single_node(self):
+        self.test_vespa_local.generate_application_package_files()
+        self.assertTrue(os.path.isdir("vespa_dummy_application_package"))
+        schema_file = os.path.join("vespa_dummy_application_package", "schemas", "test_vespa_client.sd")
+        self.assertTrue(os.path.isfile(schema_file))
+        with open(schema_file, "r") as f:
+            content = f.read()
+        self.assertIn("schema test_vespa_client", content)
+        services_file = os.path.join("vespa_dummy_application_package", "services.xml")
+        self.assertTrue(os.path.isfile(services_file))
\ No newline at end of file