diff --git a/whisper/decoding.py b/whisper/decoding.py index c604631..ed8d900 100644 --- a/whisper/decoding.py +++ b/whisper/decoding.py @@ -615,7 +615,7 @@ class DecodingTask: n_audio: int = mel.shape[0] audio_features: Tensor = self._get_audio_features(mel) # encoder forward pass - tokens: Tensor = torch.tensor([self.initial_tokens]).expand(n_audio, -1) + tokens: Tensor = torch.tensor([self.initial_tokens]).repeat(n_audio, 1) # detect language if requested, overwriting the language token languages, language_probs = self._detect_language(audio_features, tokens)