From 382d233f0617b8a49ed719b495186115ae173490 Mon Sep 17 00:00:00 2001 From: "Lin, Fanli" Date: Tue, 30 Jul 2024 13:12:27 -0400 Subject: [PATCH 1/6] fix --- src/transformers/models/whisper/generation_whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/whisper/generation_whisper.py b/src/transformers/models/whisper/generation_whisper.py index df9689b59788..bbf8f219973d 100644 --- a/src/transformers/models/whisper/generation_whisper.py +++ b/src/transformers/models/whisper/generation_whisper.py @@ -1625,7 +1625,7 @@ def _prepare_decoder_input_ids( kwargs["decoder_attention_mask"] = decoder_input_ids != generation_config.pad_token_id elif prompt_ids is not None: - prev_tokens = prompt_ids[None].repeat(decoder_input_ids.shape[0], 1) + prev_tokens = prompt_ids[None].repeat(decoder_input_ids.shape[0], 1).to(decoder_input_ids.device) decoder_input_ids = torch.cat([prev_tokens, decoder_input_ids], dim=-1) # make sure `"decoder_attention_mask"` is not passed to forward kwargs.pop("decoder_attention_mask", None) From 3d666f1a5bdde95e918fccb96e0c07df76510021 Mon Sep 17 00:00:00 2001 From: "Lin, Fanli" Date: Tue, 30 Jul 2024 13:23:35 -0400 Subject: [PATCH 2/6] enable on xpu --- tests/pipelines/test_pipelines_automatic_speech_recognition.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index d8810f67eec1..52fd08432d9a 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -1496,7 +1496,7 @@ def test_with_local_lm_fast(self): def test_whisper_prompted(self): processor = AutoProcessor.from_pretrained("openai/whisper-tiny") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny") - model = model.to("cuda") + model = model.to(torch_device) pipe = pipeline( "automatic-speech-recognition", @@ -1506,7 +1506,6 @@ def test_whisper_prompted(self): max_new_tokens=128, chunk_length_s=30, batch_size=16, - device="cuda:0", ) dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation") From 3c7914e26bd6495f2d4db14be9aa61de4bce6eaa Mon Sep 17 00:00:00 2001 From: "Lin, Fanli" Date: Fri, 9 Aug 2024 13:25:40 -0400 Subject: [PATCH 3/6] no manual remove --- src/transformers/models/whisper/generation_whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/whisper/generation_whisper.py b/src/transformers/models/whisper/generation_whisper.py index 513ca6240453..3c4b5795e461 100644 --- a/src/transformers/models/whisper/generation_whisper.py +++ b/src/transformers/models/whisper/generation_whisper.py @@ -1673,7 +1673,7 @@ def _prepare_decoder_input_ids( kwargs["decoder_attention_mask"] = decoder_input_ids != generation_config.pad_token_id elif prompt_ids is not None: - prev_tokens = prompt_ids[None].repeat(decoder_input_ids.shape[0], 1).to(decoder_input_ids.device) + prev_tokens = prompt_ids[None].repeat(decoder_input_ids.shape[0], 1) decoder_input_ids = torch.cat([prev_tokens, decoder_input_ids], dim=-1) # make sure `"decoder_attention_mask"` is not passed to forward kwargs.pop("decoder_attention_mask", None) From 424ff9635b90c06a2c5f1613a95b0e6de9a18b35 Mon Sep 17 00:00:00 2001 From: "Lin, Fanli" Date: Fri, 9 Aug 2024 13:47:33 -0400 Subject: [PATCH 4/6] move to device --- tests/pipelines/test_pipelines_automatic_speech_recognition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index 85a7c8accd07..abb07d831ad0 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -1530,7 +1530,7 @@ def test_whisper_prompted(self): # prompt the model to misspell "Mr Quilter" as "Mr Quillter" whisper_prompt = "Mr. Quillter." - prompt_ids = pipe.tokenizer.get_prompt_ids(whisper_prompt, return_tensors="pt") + prompt_ids = pipe.tokenizer.get_prompt_ids(whisper_prompt, return_tensors="pt").to(torch_device) unprompted_result = pipe(sample.copy())["text"] prompted_result = pipe(sample, generate_kwargs={"prompt_ids": prompt_ids})["text"] From 1c16f6ede8c8c7c376dec915679c9d88435487b8 Mon Sep 17 00:00:00 2001 From: "Lin, Fanli" Date: Fri, 9 Aug 2024 14:33:36 -0400 Subject: [PATCH 5/6] remove to --- tests/pipelines/test_pipelines_automatic_speech_recognition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index abb07d831ad0..85a7c8accd07 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -1530,7 +1530,7 @@ def test_whisper_prompted(self): # prompt the model to misspell "Mr Quilter" as "Mr Quillter" whisper_prompt = "Mr. Quillter." - prompt_ids = pipe.tokenizer.get_prompt_ids(whisper_prompt, return_tensors="pt").to(torch_device) + prompt_ids = pipe.tokenizer.get_prompt_ids(whisper_prompt, return_tensors="pt") unprompted_result = pipe(sample.copy())["text"] prompted_result = pipe(sample, generate_kwargs={"prompt_ids": prompt_ids})["text"] From 45f9c8f3bf9e9a3733c964f90b2dcdf7221fe951 Mon Sep 17 00:00:00 2001 From: "Lin, Fanli" Date: Tue, 13 Aug 2024 15:17:04 -0400 Subject: [PATCH 6/6] add move to --- tests/pipelines/test_pipelines_automatic_speech_recognition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index 85a7c8accd07..abb07d831ad0 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -1530,7 +1530,7 @@ def test_whisper_prompted(self): # prompt the model to misspell "Mr Quilter" as "Mr Quillter" whisper_prompt = "Mr. Quillter." - prompt_ids = pipe.tokenizer.get_prompt_ids(whisper_prompt, return_tensors="pt") + prompt_ids = pipe.tokenizer.get_prompt_ids(whisper_prompt, return_tensors="pt").to(torch_device) unprompted_result = pipe(sample.copy())["text"] prompted_result = pipe(sample, generate_kwargs={"prompt_ids": prompt_ids})["text"]