Skip to content

Commit 4d45312

Browse files
Merge branch 'huggingface:main' into pr1280_fix
2 parents 2c3d979 + 82a1c96 commit 4d45312

File tree

16 files changed

+300
-28
lines changed

16 files changed

+300
-28
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ The following model architectures, tasks and device distributions have been vali
232232
| ClipSeg | | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
233233
| Llava / Llava-next | | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
234234
| idefics2 | <div style="text-align:left"><li>LoRA</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
235+
| Paligemma | | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
235236
| Segment Anything Model | | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
236237
| VideoMAE | | <div style="text-align:left"><li>Single card</li></div> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/main/examples/video-classification)</li> |
237238
| TableTransformer | | <div style="text-align:left"><li>Single card</li></div> | <li>[table object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/table-detection) </li> |

docs/source/index.mdx

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ In the tables below, ✅ means single-card, multi-card and DeepSpeed have all be
7777
| OWLViT | | <div style="text-align:left"><li>Single card</li></div> | <li>[zero shot object detection](https://github.com/huggingface/optimum-habana/tree/main/examples/zero-shot-object-detection)</li> |
7878
| ClipSeg | | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
7979
| Llava / Llava-next | | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
80+
| Paligemma | | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
8081
| idefics2 | <div style="text-align:left"><li>LoRA</li></div> | <div style="text-align:left"><li>Single card</li></div> | <li>[image to text](https://github.com/huggingface/optimum-habana/tree/main/examples/image-to-text)</li> |
8182
| SAM | | <div style="text-align:left"><li>Single card</li></div> | <li>[object segmentation](https://github.com/huggingface/optimum-habana/tree/main/examples/object-segementation)</li> |
8283
| VideoMAE | | <div style="text-align:left"><li>Single card</li></div> | <li>[Video classification](https://github.com/huggingface/optimum-habana/tree/main/examples/video-classification)</li> |

examples/image-to-text/README.md

+10-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Models that have been validated:
3232
- [llava-hf/llama3-llava-next-8b-hf](https://huggingface.co/llava-hf/llama3-llava-next-8b-hf)
3333
- [HuggingFaceM4/idefics2-8b](https://huggingface.co/HuggingFaceM4/idefics2-8b)
3434
- [meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct)
35+
- [google/paligemma-3b-mix-224](https://huggingface.co/google/paligemma-3b-mix-224)
3536

3637
### Inference with BF16
3738

@@ -77,16 +78,22 @@ python3 run_pipeline.py \
7778
```
7879

7980
To run Llava-hf/llava-v1.6-34b-hf inference, use the following command:
80-
8181
```bash
8282
python3 run_pipeline.py \
8383
--model_name_or_path llava-hf/llava-v1.6-34b-hf \
8484
--use_hpu_graphs \
8585
--bf16
8686
```
8787

88-
To run Llava-hf/llama3-llava-next-8b-hf inference, use the following command:
88+
To run google/paligemma-3b-mix-224 inference, use the following command:
89+
```bash
90+
python3 run_pipeline.py \
91+
--model_name_or_path google/paligemma-3b-mix-224 \
92+
--use_hpu_graphs \
93+
--bf16
94+
```
8995

96+
To run Llava-hf/llama3-llava-next-8b-hf inference, use the following command:
9097
```bash
9198
python3 run_pipeline.py \
9299
--model_name_or_path llava-hf/llama3-llava-next-8b-hf \
@@ -405,4 +412,4 @@ QUANT_CONFIG=./quantization_config/maxabs_quant.json PT_HPU_ENABLE_LAZY_COLLECTI
405412
--bf16 \
406413
--use_flash_attention \
407414
--flash_attention_recompute
408-
```
415+
```

examples/image-to-text/run_pipeline.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,16 @@ def main():
187187
model_type = AutoConfig.from_pretrained(args.model_name_or_path).model_type
188188
if args.image_path is None and model_type in ["llava", "idefics2", "mllama"]:
189189
args.image_path = ["https://llava-vl.github.io/static/images/view.jpg"]
190+
elif args.image_path is None and model_type == "paligemma":
191+
args.image_path = [
192+
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true"
193+
]
190194
elif args.image_path is None and model_type == "llava_next":
191195
args.image_path = [
192196
"https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
193197
]
194-
if args.prompt is None and model_type in ["llava", "idefics2", "llava_next", "mllama"]:
198+
199+
if args.prompt is None and model_type in ["llava", "idefics2", "llava_next", "mllama", "paligemma"]:
195200
processor = AutoProcessor.from_pretrained(args.model_name_or_path)
196201
conversation = [
197202
{
@@ -202,7 +207,10 @@ def main():
202207
],
203208
}
204209
]
205-
args.prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
210+
if model_type == "paligemma":
211+
args.prompt = "caption es"
212+
else:
213+
args.prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
206214

207215
image_paths = args.image_path
208216
image_paths_len = len(image_paths)
@@ -276,7 +284,7 @@ def main():
276284
htcore.hpu_initialize(generator.model)
277285

278286
# delete once pipeline integrate AutoProcessor as preprocess engine
279-
if model_type in ["idefics2", "mllama"]:
287+
if model_type in ["idefics2", "mllama", "paligemma"]:
280288
from transformers.image_utils import load_image
281289

282290
def preprocess(self, image, prompt=None, timeout=None):

examples/stable-diffusion/text_to_image_generation.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,12 @@ def main():
286286
action="store_true",
287287
help="Use rescale_betas_zero_snr for controlling image brightness",
288288
)
289+
parser.add_argument("--optimize", action="store_true", help="Use optimized pipeline.")
289290
args = parser.parse_args()
290291

292+
if args.optimize and not args.use_habana:
293+
raise ValueError("--optimize can only be used with --use-habana.")
294+
291295
# Select stable diffuson pipeline based on input
292296
sdxl_models = ["stable-diffusion-xl", "sdxl"]
293297
sd3_models = ["stable-diffusion-3"]
@@ -302,6 +306,8 @@ def main():
302306
scheduler = GaudiEulerDiscreteScheduler.from_pretrained(
303307
args.model_name_or_path, subfolder="scheduler", **kwargs
304308
)
309+
if args.optimize:
310+
scheduler.hpu_opt = True
305311
elif args.scheduler == "euler_ancestral_discrete":
306312
scheduler = GaudiEulerAncestralDiscreteScheduler.from_pretrained(
307313
args.model_name_or_path, subfolder="scheduler", **kwargs
@@ -417,14 +423,31 @@ def main():
417423

418424
pipeline = AutoPipelineForInpainting.from_pretrained(args.model_name_or_path, **kwargs)
419425

420-
else:
426+
elif args.optimize:
421427
# Import SDXL pipeline
428+
import habana_frameworks.torch.hpu as torch_hpu
429+
430+
from optimum.habana.diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_mlperf import (
431+
StableDiffusionXLPipeline_HPU,
432+
)
433+
434+
pipeline = StableDiffusionXLPipeline_HPU.from_pretrained(
435+
args.model_name_or_path,
436+
**kwargs,
437+
)
438+
439+
pipeline.to(torch.device("hpu"))
440+
pipeline.unet.set_default_attn_processor(pipeline.unet)
441+
if args.use_hpu_graphs:
442+
pipeline.unet = torch_hpu.wrap_in_hpu_graph(pipeline.unet)
443+
else:
422444
from optimum.habana.diffusers import GaudiStableDiffusionXLPipeline
423445

424446
pipeline = GaudiStableDiffusionXLPipeline.from_pretrained(
425447
args.model_name_or_path,
426448
**kwargs,
427449
)
450+
428451
if args.lora_id:
429452
pipeline.load_lora_weights(args.lora_id)
430453

optimum/habana/diffusers/models/attention_processor.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import torch
2020
import torch.nn.functional as F
2121
from diffusers.models.attention_processor import Attention
22-
from diffusers.utils import USE_PEFT_BACKEND, logging
22+
from diffusers.utils import deprecate, logging
2323
from diffusers.utils.import_utils import is_xformers_available
2424
from torch import nn
2525

@@ -107,8 +107,13 @@ def __call__(
107107
encoder_hidden_states: Optional[torch.FloatTensor] = None,
108108
attention_mask: Optional[torch.FloatTensor] = None,
109109
temb: Optional[torch.FloatTensor] = None,
110-
scale: float = 1.0,
110+
*args,
111+
**kwargs,
111112
) -> torch.FloatTensor:
113+
if len(args) > 0 or kwargs.get("scale", None) is not None:
114+
deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
115+
deprecate("scale", "1.0.0", deprecation_message)
116+
112117
residual = hidden_states
113118
if attn.spatial_norm is not None:
114119
hidden_states = attn.spatial_norm(hidden_states, temb)
@@ -132,16 +137,15 @@ def __call__(
132137
if attn.group_norm is not None:
133138
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
134139

135-
args = () if USE_PEFT_BACKEND else (scale,)
136-
query = attn.to_q(hidden_states, *args)
140+
query = attn.to_q(hidden_states)
137141

138142
if encoder_hidden_states is None:
139143
encoder_hidden_states = hidden_states
140144
elif attn.norm_cross:
141145
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)
142146

143-
key = attn.to_k(encoder_hidden_states, *args)
144-
value = attn.to_v(encoder_hidden_states, *args)
147+
key = attn.to_k(encoder_hidden_states)
148+
value = attn.to_v(encoder_hidden_states)
145149

146150
inner_dim = key.shape[-1]
147151
head_dim = inner_dim // attn.heads
@@ -171,7 +175,7 @@ def __call__(
171175
hidden_states = hidden_states.to(query.dtype)
172176

173177
# linear proj
174-
hidden_states = attn.to_out[0](hidden_states, *args)
178+
hidden_states = attn.to_out[0](hidden_states)
175179
# dropout
176180
hidden_states = attn.to_out[1](hidden_states)
177181

optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_mlperf.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ def __init__(
170170
)
171171
self.unet.set_default_attn_processor = set_default_attn_processor_hpu
172172
self.unet.forward = gaudi_unet_2d_condition_model_forward
173+
self.quantized = False
173174

174175
def run_unet(
175176
self,
@@ -609,7 +610,6 @@ def __call__(
609610

610611
self._num_timesteps = len(timesteps)
611612
with self.progress_bar(total=num_inference_steps) as progress_bar:
612-
timesteps = [t.item() for t in timesteps]
613613
if self.quantized:
614614
for i, t in enumerate(timesteps[0:-2]):
615615
if self.interrupt:
@@ -666,7 +666,9 @@ def __call__(
666666
)
667667
hb_profiler.step()
668668
else:
669-
for i, t in enumerate(timesteps):
669+
for i in range(num_inference_steps):
670+
t = timesteps[0]
671+
timesteps = torch.roll(timesteps, shifts=-1, dims=0)
670672
if self.interrupt:
671673
continue
672674
latents = self.run_unet(

optimum/habana/transformers/generation/utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
"qwen2_moe",
110110
"xglm",
111111
"whisper",
112+
"paligemma",
112113
"idefics2",
113114
"mllama",
114115
]

optimum/habana/transformers/modeling_utils.py

+6
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@
108108
GaudiMptModel,
109109
GaudiOPTForCausalLM,
110110
GaudiOPTLearnedPositionalEmbedding,
111+
GaudiPaliGemmaForConditionalGeneration,
111112
GaudiPersimmonAttention,
112113
GaudiPersimmonDecoderLayer,
113114
GaudiPersimmonForCausalLM,
@@ -436,6 +437,11 @@ def adapt_transformers_to_gaudi():
436437
GaudiLlavaNextForConditionalGeneration
437438
)
438439

440+
# Optimization for paligemma on Gaudi
441+
transformers.models.paligemma.modeling_paligemma.PaliGemmaForConditionalGeneration = (
442+
GaudiPaliGemmaForConditionalGeneration
443+
)
444+
439445
# Optimization for idefics2 on Gaudi
440446
transformers.models.idefics2.modeling_idefics2.Idefics2ForConditionalGeneration = (
441447
GaudiIdefics2ForConditionalGeneration

optimum/habana/transformers/models/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@
189189
gaudi_opt_model_forward,
190190
)
191191
from .owlvit import gaudi_owlvitclasspredictionhead_forward
192+
from .paligemma import GaudiPaliGemmaForConditionalGeneration
192193
from .persimmon import (
193194
GaudiPersimmonAttention,
194195
GaudiPersimmonDecoderLayer,

optimum/habana/transformers/models/gemma/modeling_gemma.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ def forward(self, cur, dim, idx):
135135
class GaudiGemmaAttention(GemmaAttention):
136136
def __init__(self, config: GemmaConfig, layer_idx: Optional[int] = None):
137137
super().__init__(config, layer_idx)
138-
138+
config.rope_scaling = config.rope_scaling if hasattr(config, "rope_scaling") else None
139139
self.matmul_qk = Matmul()
140140
self.matmul_av = Matmul()
141141
self.k_cache = KVCache()
@@ -605,12 +605,13 @@ def forward(
605605
position_ids = position_ids.unsqueeze(0)
606606

607607
# HPU specific mask generation
608-
attention_mask = _gaudi_prepare_4d_causal_attention_mask(
609-
attention_mask,
610-
input_ids.shape if input_ids is not None else (batch_size, seq_length),
611-
inputs_embeds,
612-
past_seen_tokens,
613-
)
608+
if attention_mask.dim() != 4:
609+
attention_mask = _gaudi_prepare_4d_causal_attention_mask(
610+
attention_mask,
611+
input_ids.shape if input_ids is not None else (batch_size, seq_length),
612+
inputs_embeds,
613+
past_seen_tokens,
614+
)
614615
# embed positions
615616
hidden_states = inputs_embeds
616617

Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .modeling_paligemma import GaudiPaliGemmaForConditionalGeneration

0 commit comments

Comments
 (0)