Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Corrected Throughput measure for GaudiDDPMPipeline #1460

Merged
merged 5 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion examples/stable-diffusion/unconditional_image_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ def main():
default="/tmp/",
help="Where to save the generated images. The default is DDPMScheduler.",
)
parser.add_argument(
"--throughput_warmup_steps",
type=int,
default=3,
help="Number of steps to ignore for throughput calculation.",
)

args = parser.parse_args()
model_name = args.model_name_or_path
Expand All @@ -100,8 +106,10 @@ def main():
"gaudi_config": gaudi_config,
}

kwargs_call = {"throughput_warmup_steps": args.throughput_warmup_steps}

pipeline = GaudiDDPMPipeline.from_pretrained(model_name, **kwargs)
output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps)
output = pipeline(batch_size=args.batch_size, num_inference_steps=args.num_inference_steps, **kwargs_call)

if args.output_dir:
logger.info(f"Generating outputs to {args.output_dir}")
Expand Down
21 changes: 19 additions & 2 deletions optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from optimum.habana.transformers.gaudi_configuration import GaudiConfig
from optimum.utils import logging

from ....utils import speed_metrics


logger = logging.get_logger(__name__)

Expand Down Expand Up @@ -149,8 +151,14 @@ def __call__(
if self.use_habana:
self.unet = self.unet.to(self._device)

throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3)

start_time = time.time()
time_after_warmup = start_time
for i in self.progress_bar(num_inference_steps):
if i == throughput_warmup_steps:
time_after_warmup = time.time()

timestep = timesteps[0]
timesteps = torch.roll(timesteps, shifts=-1, dims=0)

Expand All @@ -172,13 +180,22 @@ def __call__(
image = image.cpu().permute(0, 2, 3, 1).numpy()
if output_type == "pil":
image = self.numpy_to_pil(image)
end_time = time.time()

speed_metrics_prefix = "generation"
speed_measures = speed_metrics(
split=speed_metrics_prefix,
start_time=start_time,
num_samples=batch_size,
num_steps=batch_size * len(num_inference_steps),
start_time_after_warmup=time_after_warmup,
)
logger.info(f"Speed metrics: {speed_measures}")

# Offload all models
self.maybe_free_model_hooks()

if not return_dict:
return (image,)

throughput = (end_time - start_time) / batch_size
throughput = speed_measures["generation_samples_per_second"]
return GaudiDDPMPipelineOutput(images=image, throughput=throughput)
4 changes: 2 additions & 2 deletions tests/test_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 1.151
TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 70
DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.946
THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 7.671212047338486
THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.15186785472532677
DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 36.06376791000366
else:
THROUGHPUT_BASELINE_BF16 = 0.309
Expand All @@ -148,7 +148,7 @@
INPAINT_THROUGHPUT_BASELINE_BF16 = 1.42
INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 0.271
DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.302
THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 3.095533166996529
THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.050208662346013566
TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 1000 # TODO: Get Gaudi 1 benchmark numbers
DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 200 # TODO: Get Gaudi 1 Throughput

Expand Down
Loading