Merge ccacb7f5defb139e77a553c2114cf19c63084962 into 173ff7dd1d9fb1c4fddea0d41d704cfefeb8908c

2025-11-26 23:46:09 +00:00 · 2024-11-26 20:45:46 +03:30 · 2024-11-26 20:45:46 +03:30 · 369480a232
commit 369480a232
parent 173ff7dd1d ccacb7f5de
1 changed files with 2 additions and 2 deletions
--- a/whisper/audio.py
+++ b/whisper/audio.py
@ -122,7 +122,7 @@ def log_mel_spectrogram(
        The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz

    n_mels: int
-        The number of Mel-frequency filters, only 80 is supported
+        The number of Mel-frequency filters, only 80 or 128 is supported

    padding: int
        Number of zero samples to pad to the right
@ -132,7 +132,7 @@ def log_mel_spectrogram(

    Returns
    -------
-    torch.Tensor, shape = (80, n_frames)
+    torch.Tensor, shape = (80, n_frames) or (128, n_frames), depends on n_mels value
        A Tensor that contains the Mel spectrogram
    """
    if not torch.is_tensor(audio):