diff --git a/.github/workflows/instant_benchmark.yml b/.github/workflows/instant_benchmark.yml
index 82f1b8d3e7..0b8a48598a 100644
--- a/.github/workflows/instant_benchmark.yml
+++ b/.github/workflows/instant_benchmark.yml
@@ -15,9 +15,14 @@ on:
           - g5.2xlarge
           - g5.12xlarge
           - g5.48xlarge
+          - g6.2xlarge
+          - g6.12xlarge
+          - g6.48xlarge
           - g4dn.12xlarge
           - g4dn.2xlarge
           - p4d.24xlarge
+          - p4de.24xlarge
+          - p5.24xlarge
           - inf2.8xlarge
           - inf2.24xlarge
           - trn1.2xlarge
@@ -127,26 +132,20 @@ jobs:
         run: |
           wget https://publish.djl.ai/awscurl/awscurl
           chmod +x awscurl
-      - name: Run benchmark job
-        working-directory: tests/integration
-        run: |
-          echo "${{ needs.environment-setup.outputs.template }}" >> template.json
-          python3 instant_benchmark.py --template template.json \
-          --job ${{ matrix.job }} --instance ${{ inputs.instance }}
-          
-          bash instant_benchmark.sh
       - name: Configure AWS Credentials
         uses: aws-actions/configure-aws-credentials@v4
         with:
           role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
           aws-region: us-east-1
-      - name: Record benchmark job
-        if: ${{ inputs.record == 'table' || inputs.record == 'cloudwatch' }}
+      - name: Run benchmark job
         working-directory: tests/integration
         run: |
-          python3 record_benchmark.py --template template.json \
+          echo "${{ needs.environment-setup.outputs.template }}" >> template.json
+          python3 instant_benchmark.py --template template.json \
           --job ${{ matrix.job }} --instance ${{ inputs.instance }} \
-          --model models/test --record ${{ inputs.record }}
+          --record ${{ inputs.record }}
+          
+          bash instant_benchmark.sh
       - name: Get serving logs
         if: always()
         working-directory: tests/integration
diff --git a/tests/integration/benchmark/nightly/g5-12xl.txt b/tests/integration/benchmark/nightly/g5-12xl.txt
index 1244b5adea..cd73232d2b 100644
--- a/tests/integration/benchmark/nightly/g5-12xl.txt
+++ b/tests/integration/benchmark/nightly/g5-12xl.txt
@@ -13,6 +13,7 @@ option.tensor_parallel_degree=max
 TOKENIZER=TheBloke/Llama-2-7B-fp16 ./awscurl -c 32 -N 10 \
 -X POST http://127.0.0.1:8080/invocations   \
 --connect-timeout 60   -H "Content-type: application/json"   \
+--json-path benchmark_result.json \
 -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
 -t -o /tmp/output.txt
 [test_name]
@@ -22,13 +23,13 @@ ENGINE={vllm,lmi-dist}
 [container]
 deepjavalibrary/djl-serving:lmi-nightly
 [serving_properties]
-engine=Python
 option.rolling_batch=$ENGINE
 option.model_id=s3://djl-llm/llama-3-8b-hf/
 option.tensor_parallel_degree=max
 [aws_curl]
 TOKENIZER=TheBloke/Llama-2-13B-fp16 ./awscurl -c 32 -N 10 \
 -X POST http://127.0.0.1:8080/invocations   \
+--json-path benchmark_result.json \
 --connect-timeout 60   -H "Content-type: application/json"   \
 -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
 -t -o /tmp/output.txt
diff --git a/tests/integration/benchmark/nightly/g5-2xl.txt b/tests/integration/benchmark/nightly/g5-2xl.txt
index 097361342c..9ba10af681 100644
--- a/tests/integration/benchmark/nightly/g5-2xl.txt
+++ b/tests/integration/benchmark/nightly/g5-2xl.txt
@@ -2,17 +2,19 @@
 mistral
 [vars]
 ENGINE={vllm,lmi-dist}
+[benchmark_vars]
+CONCURRENCY={1,2,4,8}
 [container]
 deepjavalibrary/djl-serving:lmi-nightly
 [serving_properties]
-engine=Python
 option.rolling_batch=$ENGINE
 option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
 option.tensor_parallel_degree=max
 option.max_model_len=8192
 [aws_curl]
-TOKENIZER=TheBloke/Mistral-7B-Instruct-v0.2-AWQ ./awscurl -c 32 -N 10 \
+TOKENIZER=TheBloke/Mistral-7B-Instruct-v0.2-AWQ ./awscurl -c 32 -N $CONCURRENCY \
 -X POST http://127.0.0.1:8080/invocations   \
 --connect-timeout 60   -H "Content-type: application/json"   \
+--json-path benchmark_result.json \
 -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
 -t -o /tmp/output.txt
diff --git a/tests/integration/benchmark/nightly/g5-48xl.txt b/tests/integration/benchmark/nightly/g5-48xl.txt
index bfdda8953e..e2749edabb 100644
--- a/tests/integration/benchmark/nightly/g5-48xl.txt
+++ b/tests/integration/benchmark/nightly/g5-48xl.txt
@@ -5,13 +5,13 @@ ENGINE={vllm,lmi-dist}
 [container]
 deepjavalibrary/djl-serving:lmi-nightly
 [serving_properties]
-engine=Python
 option.rolling_batch=$ENGINE
 option.model_id=s3://djl-llm/mixtral-8x7b
 option.tensor_parallel_degree=max
 [aws_curl]
 TOKENIZER=NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO ./awscurl -c 32 -N 10 \
 -X POST http://127.0.0.1:8080/invocations   \
+--json-path benchmark_result.json \
 --connect-timeout 60   -H "Content-type: application/json"   \
 -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
 -t -o /tmp/output.txt
diff --git a/tests/integration/instant_benchmark.py b/tests/integration/instant_benchmark.py
index fc10fa5579..3d0f3ed0dc 100644
--- a/tests/integration/instant_benchmark.py
+++ b/tests/integration/instant_benchmark.py
@@ -29,6 +29,10 @@
                     required=False,
                     type=str,
                     help="The current instance name")
+parser.add_argument("--record",
+                    required=False,
+                    type=str,
+                    help="Place to record metrics")
 
 parser.add_argument("--job", required=False, type=str, help="The job string")
 args = parser.parse_args()
@@ -110,6 +114,7 @@ def parse_raw_template(url, override_container):
     commandline = []
     requirements = []
     vars = []
+    benchmark_vars = []
     info = None
     while iterator < len(lines):
         if '[test_name]' == lines[iterator]:
@@ -148,6 +153,12 @@ def parse_raw_template(url, override_container):
                     lines[iterator]):
                 vars.append(lines[iterator])
                 iterator += 1
+        elif '[benchmark_vars]' == lines[iterator]:
+            iterator += 1
+            while iterator < len(lines) and not is_square_bracket(
+                    lines[iterator]):
+                benchmark_vars.append(lines[iterator])
+                iterator += 1
         elif '[info]' == lines[iterator]:
             info = []
             iterator += 1
@@ -174,8 +185,14 @@ def parse_raw_template(url, override_container):
             if info is not None:
                 cur_result['info'] = info
             mul_results = multiply_template_with_vars(name, cur_result, vars)
+            # each of the replicated deployment options
             for r in mul_results.values():
-                r['awscurl'] = r['awscurl'].encode().hex()
+                replicated_awscurl = multiply_template_with_vars(
+                    '', {'awscurl': cur_result['awscurl']}, benchmark_vars)
+                for option in replicated_awscurl.keys():
+                    replicated_awscurl[option] = replicated_awscurl[option][
+                        'awscurl'].encode().hex()
+                r['awscurl'] = replicated_awscurl
             final_result.update(mul_results)
             name = ''
             container = None
@@ -219,23 +236,33 @@ def machine_translation(machine_name: str):
         return "lmi"
 
 
-def build_running_script(template, job, instance):
+def build_running_script(template, job, instance, record):
     with open(template) as f:
         template = json.load(f)
     job_template = template[job]
-    job_template['awscurl'] = bytes.fromhex(
-        job_template['awscurl']).decode("utf-8")
+    for key in job_template['awscurl'].keys():
+        job_template['awscurl'][key] = bytes.fromhex(
+            job_template['awscurl'][key]).decode("utf-8")
     write_model_artifacts(job_template['properties'],
                           job_template['requirements'], job_template['env'])
-
     container = job_template['container']
 
+    benchmark_command = []
+    record_benchmark = ('python3 record_benchmark.py --template template.json '
+                        f'--job {job} --instance {instance} '
+                        f'--model models/test --record {record}')
+
+    for key, value in job_template['awscurl'].items():
+        benchmark_command.append(value)
+        benchmark_command.append(record_benchmark + f' --benchmark-vars "{key}"')
+
     bash_command = [
-        'set -euo pipefail', 'echo "Start Launching container..."',
+        'set -euo pipefail',
+        'echo "Start Launching container..."',
         f"docker pull {container}",
         f"./launch_container.sh {container} $PWD/models {machine_translation(instance)}",
-        job_template['awscurl'] + " | tee benchmark.log"
     ]
+    bash_command.extend(benchmark_command)
     with open("instant_benchmark.sh", "w") as f:
         f.write('\n'.join(bash_command))
 
@@ -249,7 +276,8 @@ def build_running_script(template, job, instance):
         command = f"echo \"template={json.dumps(json.dumps(json.dumps(result)))}\" >> $GITHUB_OUTPUT"
         sp.call(command, shell=True)
     elif args.template and args.job and args.instance:
-        build_running_script(args.template, args.job, args.instance)
+        build_running_script(args.template, args.job, args.instance,
+                             args.record)
     else:
         parser.print_help()
         raise ValueError("args not supported")
diff --git a/tests/integration/record_benchmark.py b/tests/integration/record_benchmark.py
index 218ecb6c47..df6cdcaeba 100755
--- a/tests/integration/record_benchmark.py
+++ b/tests/integration/record_benchmark.py
@@ -37,6 +37,11 @@
                     required=False,
                     type=str,
                     help="The path to the model input directory")
+parser.add_argument("--benchmark-vars",
+                    required=False,
+                    type=str,
+                    help="The benchmark variables used to differentiate in"
+                    " cloudwatch like [CONCURRENCY=2,DATASET=gsm8k]")
 parser.add_argument("--info",
                     required=False,
                     type=str,
@@ -46,55 +51,61 @@
 
 data = {}
 
+cloudwatch_report_schema = {
+    "totalTimeMills": 'Milliseconds',
+    "totalRequests": 'Count',
+    "failedRequests": 'Count',
+    "concurrentClients": 'Count',
+    "totalTokens": 'Count',
+    "tokenPerRequest": 'Count',
+    "averageLatency": 'Milliseconds',
+    "p50Latency": 'Milliseconds',
+    "p90Latency": 'Milliseconds',
+    "p99Latency": 'Milliseconds',
+    "timeToFirstByte": 'Milliseconds',
+    "p50TimeToFirstByte": 'Milliseconds',
+    "p90TimeToFirstByte": 'Milliseconds',
+    "p99TimeToFirstByte": 'Milliseconds',
+}
+
 
 class Benchmark:
 
-    def __init__(self, dyn_resource):
+    def __init__(self, dyn_resource, data: dict):
         self.dyn_resource = dyn_resource
         self.table = dyn_resource.Table("RubikonBenchmarks")
         self.table.load()
+        self.data = data
 
     def add_benchmark(self):
-        self.table.put_item(Item=data)
+        self.table.put_item(Item=self.data)
 
 
-def record_table():
+def record_table(data: dict):
     table = boto3.resource("dynamodb").Table("RubikonBenchmarks")
     table.put_item(Item=data)
 
 
-def record_cloudwatch():
+def record_cloudwatch(data: dict):
     esc = lambda n: n.replace("/", "-").replace(".", "-").replace("=", "-"
                                                                   ).strip(' -')
     job_name = data["modelId"] if "job" not in data else data["job"]
-    metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}_{esc(job_name)}_{n}"
-    metric_data = [
-        {
-            'MetricName': metric_name("throughput"),
-            'Unit': 'Count/Second',
-            'Value': data['throughput']
-        },
-        {
-            'MetricName': metric_name("latency_p50"),
-            'Unit': 'Milliseconds',
-            'Value': data['P50']
-        },
-        {
-            'MetricName': metric_name("latency_p90"),
-            'Unit': 'Milliseconds',
-            'Value': data['P90']
-        },
-        {
-            'MetricName': metric_name("latency_p99"),
-            'Unit': 'Milliseconds',
-            'Value': data['P99']
-        },
-    ]
+    benchmark_vars = data["benchmark_vars"] if data["benchmark_vars"] else ""
+    metric_name = lambda n: (f"lmi_{data['instance']}_{esc(data['image'])}"
+                             f"_{esc(job_name)}_{esc(benchmark_vars)}_{n}")
+    metric_data = []
+    for metric, unit in cloudwatch_report_schema.items():
+        if metric in data.keys():
+            metric_data.append({
+                'MetricName': metric_name(metric),
+                'Unit': unit,
+                'Value': data[metric]
+            })
     cw = boto3.client('cloudwatch', region_name='us-east-1')
     cw.put_metric_data(Namespace="LMI_Benchmark", MetricData=metric_data)
 
 
-def data_basic():
+def data_basic(data: dict):
     data["modelServer"] = "DJLServing"
     data["service"] = "ec2"
     data["Timestamp"] = Decimal(time.time())
@@ -110,33 +121,39 @@ def data_basic():
             data[[split[0]]] = split[1]
 
 
-def data_from_client():
-    with open("benchmark.log", "r") as f:
-        for line in f.readlines():
-            line = line.strip()
-            if "Total time:" in line:
-                data["totalTime"] = Decimal(line.split(" ")[2])
-            if "error rate:" in line:
-                data["errorRate"] = Decimal(line.split(" ")[-1])
-            if "Concurrent clients:" in line:
-                data["concurrency"] = int(line.split(" ")[2])
-            if "Total requests:" in line:
-                data["requests"] = int(line.split(" ")[2])
-            if "TPS:" in line:
-                data["tps"] = Decimal(line.split(" ")[1].split("/")[0])
-            if "Average Latency:" in line:
-                data["avgLatency"] = Decimal(line.split(" ")[2])
-            if "P50:" in line:
-                data["P50"] = Decimal(line.split(" ")[1])
-            if "P90:" in line:
-                data["P90"] = Decimal(line.split(" ")[1])
-            if "P99:" in line:
-                data["P99"] = Decimal(line.split(" ")[1])
-            if "totalTime" in data and "requests" in data:
-                data["throughput"] = data["requests"] / data["totalTime"]
-
-
-def data_container():
+def data_from_client(data: dict):
+    if os.path.exists("benchmark_result.json"):
+        with open("benchmark_result.json", "r") as f:
+            data.update(json.load(f))
+    elif os.path.exists("benchmark.log"):
+        with open("benchmark.log", "r") as f:
+            for line in f.readlines():
+                line = line.strip()
+                if "Total time:" in line:
+                    data["totalTime"] = Decimal(line.split(" ")[2])
+                if "error rate:" in line:
+                    data["errorRate"] = Decimal(line.split(" ")[-1])
+                if "Concurrent clients:" in line:
+                    data["concurrency"] = int(line.split(" ")[2])
+                if "Total requests:" in line:
+                    data["requests"] = int(line.split(" ")[2])
+                if "TPS:" in line:
+                    data["tps"] = Decimal(line.split(" ")[1].split("/")[0])
+                if "Average Latency:" in line:
+                    data["avgLatency"] = Decimal(line.split(" ")[2])
+                if "P50:" in line:
+                    data["P50"] = Decimal(line.split(" ")[1])
+                if "P90:" in line:
+                    data["P90"] = Decimal(line.split(" ")[1])
+                if "P99:" in line:
+                    data["P99"] = Decimal(line.split(" ")[1])
+                if "totalTime" in data and "requests" in data:
+                    data["throughput"] = data["requests"] / data["totalTime"]
+    else:
+        print("There is no benchmark logs found!")
+
+
+def data_container(data: dict):
     if "container" in data:
         container = data["container"]
         if container.startswith("deepjavalibrary/djl-serving:"):
@@ -158,7 +175,7 @@ def data_container():
                 data["tgiVersion"] = version
 
 
-def data_from_model_files():
+def data_from_model_files(data: dict):
     if args.model:
         propsPath = os.path.join(args.model, "serving.properties")
         if os.path.isfile(propsPath):
@@ -211,14 +228,15 @@ def data_from_model_files():
                     data["modelId"] = envs["MODEL_ID"]
 
 
-def data_from_template():
+def data_from_template(data: dict):
     if args.template:
         with open(args.template, "r") as f:
             template = json.load(f)
             job_template = template[args.job]
             data["job"] = args.job
+            data['benchmark_vars'] = args.benchmark_vars
             data["awscurl"] = bytes.fromhex(
-                job_template['awscurl']).decode("utf-8")
+                job_template['awscurl'][args.benchmark_vars]).decode("utf-8")
             if "container" not in data and "container" in job_template:
                 data["container"] = job_template["container"]
             if "info" in job_template:
@@ -231,19 +249,19 @@ def data_from_template():
 
 
 if __name__ == "__main__":
-    data_from_template()
-    data_basic()
-    data_container()
-    data_from_client()
-    data_from_model_files()
+    data_from_template(data)
+    data_basic(data)
+    data_container(data)
+    data_from_client(data)
+    data_from_model_files(data)
 
     if "errorRate" not in data or data["errorRate"] == 100:
         print("Not recording failed benchmark")
         print(data)
     else:
         if args.record == "table":
-            record_table()
+            record_table(data)
         elif args.record == "cloudwatch":
-            record_cloudwatch()
+            record_cloudwatch(data)
         else:
             print(data)