From a8abc12425b58f4d1848b9f93515c919856256b6 Mon Sep 17 00:00:00 2001
From: deepak-gowda-narayana <deepak.narayana@intel.com>
Date: Mon, 28 Oct 2024 16:35:45 +0000
Subject: [PATCH 1/4] Corrected Throughput measure for GaudiDDPMPipeline

---
 optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py | 2 +-
 tests/test_diffusers.py                                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
index 7b3ea5afdb..f26f483813 100644
--- a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
+++ b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
@@ -180,5 +180,5 @@ def __call__(
         if not return_dict:
             return (image,)
 
-        throughput = (end_time - start_time) / batch_size
+        throughput = batch_size / (end_time - start_time)
         return GaudiDDPMPipelineOutput(images=image, throughput=throughput)
diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
index 35556c57bf..e4c61b7cda 100755
--- a/tests/test_diffusers.py
+++ b/tests/test_diffusers.py
@@ -136,7 +136,7 @@
     INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 1.151
     TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 70
     DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.946
-    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 7.671212047338486
+    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.15186785472532677
     DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 36.06376791000366
 else:
     THROUGHPUT_BASELINE_BF16 = 0.309
@@ -148,7 +148,7 @@
     INPAINT_THROUGHPUT_BASELINE_BF16 = 1.42
     INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 0.271
     DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.302
-    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 3.095533166996529
+    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.050208662346013566
     TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 1000  # TODO: Get Gaudi 1 benchmark numbers
     DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 200  # TODO: Get Gaudi 1 Throughput
 

From bc0f4d421472f968a336feed7bbba59ab61ef716 Mon Sep 17 00:00:00 2001
From: deepak-gowda-narayana
 <140652370+deepak-gowda-narayana@users.noreply.github.com>
Date: Wed, 30 Oct 2024 11:16:15 -0700
Subject: [PATCH 2/4] Add throughput warmup step argument as an optional
 parameter

---
 .../stable-diffusion/unconditional_image_generation.py    | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/examples/stable-diffusion/unconditional_image_generation.py b/examples/stable-diffusion/unconditional_image_generation.py
index baca71b6ba..8efaf8801f 100644
--- a/examples/stable-diffusion/unconditional_image_generation.py
+++ b/examples/stable-diffusion/unconditional_image_generation.py
@@ -79,6 +79,12 @@ def main():
         default="/tmp/",
         help="Where to save the generated images. The default is DDPMScheduler.",
     )
+    parser.add_argument(
+        "--throughput_warmup_steps",
+        type=int,
+        default=3,
+        help="Number of steps to ignore for throughput calculation.",
+    )
 
     args = parser.parse_args()
     model_name = args.model_name_or_path
@@ -100,6 +106,8 @@ def main():
         "gaudi_config": gaudi_config,
     }
 
+    kwargs_call = {"throughput_warmup_steps": args.throughput_warmup_steps}
+
     pipeline = GaudiDDPMPipeline.from_pretrained(model_name, **kwargs)
     output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps)
 

From 9587e0cc4c012770b3aab45fb2cbfb3f76fdc5d3 Mon Sep 17 00:00:00 2001
From: deepak-gowda-narayana
 <140652370+deepak-gowda-narayana@users.noreply.github.com>
Date: Wed, 30 Oct 2024 11:16:52 -0700
Subject: [PATCH 3/4] Update throughput calculation with speed_metrics call

---
 .../diffusers/pipelines/ddpm/pipeline_ddpm.py | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
index f26f483813..65a7df7e2d 100644
--- a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
+++ b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
@@ -29,6 +29,8 @@
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 from optimum.utils import logging
 
+from ....utils import speed_metrics
+
 
 logger = logging.get_logger(__name__)
 
@@ -149,8 +151,14 @@ def __call__(
         if self.use_habana:
             self.unet = self.unet.to(self._device)
 
+        throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3)
+
         start_time = time.time()
+        time_after_warmup = start_time
         for i in self.progress_bar(num_inference_steps):
+            if i == throughput_warmup_steps:
+                time_after_warmup = time.time()
+
             timestep = timesteps[0]
             timesteps = torch.roll(timesteps, shifts=-1, dims=0)
 
@@ -172,7 +180,16 @@ def __call__(
         image = image.cpu().permute(0, 2, 3, 1).numpy()
         if output_type == "pil":
             image = self.numpy_to_pil(image)
-        end_time = time.time()
+
+        speed_metrics_prefix = "generation"
+        speed_measures = speed_metrics(
+            split=speed_metrics_prefix,
+            start_time=start_time,
+            num_samples=batch_size,
+            num_steps=batch_size * len(num_inference_steps),
+            start_time_after_warmup=time_after_warmup,
+        )
+        logger.info(f"Speed metrics: {speed_measures}")
 
         # Offload all models
         self.maybe_free_model_hooks()
@@ -180,5 +197,5 @@ def __call__(
         if not return_dict:
             return (image,)
 
-        throughput = batch_size / (end_time - start_time)
+        throughput = speed_measures["generation_samples_per_second"]
         return GaudiDDPMPipelineOutput(images=image, throughput=throughput)

From 6b8ad213ee79de591613b231bcb9798da6845270 Mon Sep 17 00:00:00 2001
From: deepak-gowda-narayana
 <140652370+deepak-gowda-narayana@users.noreply.github.com>
Date: Wed, 30 Oct 2024 12:23:26 -0700
Subject: [PATCH 4/4] Update unconditional_image_generation.py

---
 examples/stable-diffusion/unconditional_image_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/stable-diffusion/unconditional_image_generation.py b/examples/stable-diffusion/unconditional_image_generation.py
index 8efaf8801f..36e35ff90f 100644
--- a/examples/stable-diffusion/unconditional_image_generation.py
+++ b/examples/stable-diffusion/unconditional_image_generation.py
@@ -109,7 +109,7 @@ def main():
     kwargs_call = {"throughput_warmup_steps": args.throughput_warmup_steps}
 
     pipeline = GaudiDDPMPipeline.from_pretrained(model_name, **kwargs)
-    output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps)
+    output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps, **kwargs_call)
 
     if args.output_dir:
         logger.info(f"Generating outputs to {args.output_dir}")