diff --git a/examples/stable-diffusion/unconditional_image_generation.py b/examples/stable-diffusion/unconditional_image_generation.py index baca71b6ba..36e35ff90f 100644 --- a/examples/stable-diffusion/unconditional_image_generation.py +++ b/examples/stable-diffusion/unconditional_image_generation.py @@ -79,6 +79,12 @@ def main(): default="/tmp/", help="Where to save the generated images. The default is DDPMScheduler.", ) + parser.add_argument( + "--throughput_warmup_steps", + type=int, + default=3, + help="Number of steps to ignore for throughput calculation.", + ) args = parser.parse_args() model_name = args.model_name_or_path @@ -100,8 +106,10 @@ def main(): "gaudi_config": gaudi_config, } + kwargs_call = {"throughput_warmup_steps": args.throughput_warmup_steps} + pipeline = GaudiDDPMPipeline.from_pretrained(model_name, **kwargs) - output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps) + output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps, **kwargs_call) if args.output_dir: logger.info(f"Generating outputs to {args.output_dir}") diff --git a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py index 7b3ea5afdb..65a7df7e2d 100644 --- a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py +++ b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py @@ -29,6 +29,8 @@ from optimum.habana.transformers.gaudi_configuration import GaudiConfig from optimum.utils import logging +from ....utils import speed_metrics + logger = logging.get_logger(__name__) @@ -149,8 +151,14 @@ def __call__( if self.use_habana: self.unet = self.unet.to(self._device) + throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3) + start_time = time.time() + time_after_warmup = start_time for i in self.progress_bar(num_inference_steps): + if i == throughput_warmup_steps: + time_after_warmup = time.time() + timestep = timesteps[0] timesteps = torch.roll(timesteps, shifts=-1, dims=0) @@ -172,7 +180,16 @@ def __call__( image = image.cpu().permute(0, 2, 3, 1).numpy() if output_type == "pil": image = self.numpy_to_pil(image) - end_time = time.time() + + speed_metrics_prefix = "generation" + speed_measures = speed_metrics( + split=speed_metrics_prefix, + start_time=start_time, + num_samples=batch_size, + num_steps=batch_size * len(num_inference_steps), + start_time_after_warmup=time_after_warmup, + ) + logger.info(f"Speed metrics: {speed_measures}") # Offload all models self.maybe_free_model_hooks() @@ -180,5 +197,5 @@ def __call__( if not return_dict: return (image,) - throughput = (end_time - start_time) / batch_size + throughput = speed_measures["generation_samples_per_second"] return GaudiDDPMPipelineOutput(images=image, throughput=throughput) diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py index 35556c57bf..e4c61b7cda 100755 --- a/tests/test_diffusers.py +++ b/tests/test_diffusers.py @@ -136,7 +136,7 @@ INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 1.151 TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 70 DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.946 - THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 7.671212047338486 + THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.15186785472532677 DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 36.06376791000366 else: THROUGHPUT_BASELINE_BF16 = 0.309 @@ -148,7 +148,7 @@ INPAINT_THROUGHPUT_BASELINE_BF16 = 1.42 INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 0.271 DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.302 - THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 3.095533166996529 + THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.050208662346013566 TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 1000 # TODO: Get Gaudi 1 benchmark numbers DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 200 # TODO: Get Gaudi 1 Throughput