From 21010ef454fb25954b0914785180311fb077add9 Mon Sep 17 00:00:00 2001
From: sqhao <haoshengqiang79@163.com>
Date: Tue, 19 Sep 2023 07:09:59 +0800
Subject: [PATCH] fix doc of TextDecoder (#1526)

Signed-off-by: haoshengqiang <haoshengqiang@xiaohongshu.com>
Co-authored-by: haoshengqiang <haoshengqiang@xiaohongshu.com>
---
 whisper/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/whisper/model.py b/whisper/model.py
index 3457fcfc6..69130022a 100644
--- a/whisper/model.py
+++ b/whisper/model.py
@@ -197,7 +197,7 @@ def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None):
         """
         x : torch.LongTensor, shape = (batch_size, <= n_ctx)
             the text tokens
-        xa : torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
+        xa : torch.Tensor, shape = (batch_size, n_audio_ctx, n_audio_state)
             the encoded audio features to be attended on
         """
         offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0