From 397e880ee467f7badd8ebc467fcef8b24177da5e Mon Sep 17 00:00:00 2001
From: Manuel Candales <mcandales@fb.com>
Date: Wed, 12 Feb 2025 17:00:23 -0500
Subject: [PATCH] Add performance benchmark config: MPS 8da4w

---
 .ci/scripts/gather_benchmark_configs.py |  1 +
 .github/workflows/apple-perf.yml        | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/.ci/scripts/gather_benchmark_configs.py b/.ci/scripts/gather_benchmark_configs.py
index 0fe60a0d77..c15a64e3ed 100755
--- a/.ci/scripts/gather_benchmark_configs.py
+++ b/.ci/scripts/gather_benchmark_configs.py
@@ -43,6 +43,7 @@
         "coreml_fp16",
         "mps",
         "llama3_coreml_ane",
+        "llama3_mps_8da4w",
     ],
 }
 
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
index ea88be441c..83f03323e9 100644
--- a/.github/workflows/apple-perf.yml
+++ b/.github/workflows/apple-perf.yml
@@ -298,6 +298,23 @@ jobs:
                 --coreml-compute-units cpu_and_ne \
                 --output_name="${OUT_ET_MODEL_NAME}.pte"
               ls -lh "${OUT_ET_MODEL_NAME}.pte"
+            elif [[ ${{ matrix.config }} == "llama3_mps_8da4w" ]]; then
+              # MPS 8da4w
+              DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
+              ${CONDA_RUN} python -m examples.models.llama.export_llama \
+                --model "llama3_2" \
+                --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
+                --params "${DOWNLOADED_PATH}/params.json" \
+                -kv \
+                --use_sdpa_with_kv_cache \
+                --disable_dynamic_shape \
+                --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
+                --mps \
+                -qmode 8da4w \
+                --group_size 32 \
+                --embedding-quantize 4,32 \
+                --output_name="${OUT_ET_MODEL_NAME}.pte"
+              ls -lh "${OUT_ET_MODEL_NAME}.pte"
             else
               # By default, test with the Hugging Face model and the xnnpack recipe
               DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model")