From 5ecf513ca4dbb00dfd606bb18ed1bb2aebf17d64 Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Tue, 11 Jun 2024 16:33:52 +0000 Subject: [PATCH] xpu: enable gpt2 and decision_transformer tests for xpu pytorch backend Note that running xpu tests requires TRANSFORMERS_TEST_DEVICE_SPEC=spec.py passed to the test runner: import torch DEVICE_NAME = 'xpu' MANUAL_SEED_FN = torch.xpu.manual_seed EMPTY_CACHE_FN = torch.xpu.empty_cache DEVICE_COUNT_FN = torch.xpu.device_count Signed-off-by: Dmitry Rogozhkin --- .../decision_transformer/modeling_decision_transformer.py | 3 +-- src/transformers/models/gpt2/modeling_gpt2.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/transformers/models/decision_transformer/modeling_decision_transformer.py b/src/transformers/models/decision_transformer/modeling_decision_transformer.py index 236efb1d2219..b8eb9f5a8b42 100755 --- a/src/transformers/models/decision_transformer/modeling_decision_transformer.py +++ b/src/transformers/models/decision_transformer/modeling_decision_transformer.py @@ -22,7 +22,6 @@ import torch import torch.utils.checkpoint from torch import nn -from torch.cuda.amp import autocast from ...activations import ACT2FN from ...modeling_outputs import BaseModelOutputWithPastAndCrossAttentions @@ -219,7 +218,7 @@ def _upcast_and_reordered_attn(self, query, key, value, attention_mask=None, hea scale_factor /= float(self.layer_idx + 1) # Upcast (turn off autocast) and reorder (Scale K by 1 / root(dk)) - with autocast(enabled=False): + with torch.amp.autocast(query.device.type, enabled=False): q, k = query.reshape(-1, q_seq_len, dk), key.transpose(-1, -2).reshape(-1, dk, k_seq_len) attn_weights = torch.baddbmm(attn_weights, q.float(), k.float(), beta=0, alpha=scale_factor) attn_weights = attn_weights.reshape(bsz, num_heads, q_seq_len, k_seq_len) diff --git a/src/transformers/models/gpt2/modeling_gpt2.py b/src/transformers/models/gpt2/modeling_gpt2.py index 79e5a27d3c67..7c62d06949d4 100644 --- a/src/transformers/models/gpt2/modeling_gpt2.py +++ b/src/transformers/models/gpt2/modeling_gpt2.py @@ -25,7 +25,6 @@ import torch.nn.functional as F import torch.utils.checkpoint from torch import nn -from torch.cuda.amp import autocast from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss from ...activations import ACT2FN @@ -249,7 +248,7 @@ def _upcast_and_reordered_attn(self, query, key, value, attention_mask=None, hea scale_factor /= float(self.layer_idx + 1) # Upcast (turn off autocast) and reorder (Scale K by 1 / root(dk)) - with autocast(enabled=False): + with torch.amp.autocast(query.device.type, enabled=False): q, k = query.reshape(-1, q_seq_len, dk), key.transpose(-1, -2).reshape(-1, dk, k_seq_len) attn_weights = torch.baddbmm(attn_weights, q.float(), k.float(), beta=0, alpha=scale_factor) attn_weights = attn_weights.reshape(bsz, num_heads, q_seq_len, k_seq_len)