From 6df3ea1fb55dc75ce26f4f73e75d8644ca396414 Mon Sep 17 00:00:00 2001 From: Markus Hennerbichler Date: Tue, 17 Jan 2023 07:46:15 +0000 Subject: [PATCH] Support batch-dimension in log_mel_spectogram (#839) --- whisper/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper/audio.py b/whisper/audio.py index a3d8a13..de8a195 100644 --- a/whisper/audio.py +++ b/whisper/audio.py @@ -113,7 +113,7 @@ def log_mel_spectrogram(audio: Union[str, np.ndarray, torch.Tensor], n_mels: int window = torch.hann_window(N_FFT).to(audio.device) stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True) - magnitudes = stft[:, :-1].abs() ** 2 + magnitudes = stft[..., :-1].abs() ** 2 filters = mel_filters(audio.device, n_mels) mel_spec = filters @ magnitudes