mirror of
https://github.com/openai/whisper.git
synced 2025-03-30 14:28:27 +00:00
Updating README and doc strings to reflect that n_mels can now be 128 (#2049)
This commit is contained in:
parent
173ff7dd1d
commit
fc5ded7d90
@ -126,7 +126,7 @@ audio = whisper.load_audio("audio.mp3")
|
|||||||
audio = whisper.pad_or_trim(audio)
|
audio = whisper.pad_or_trim(audio)
|
||||||
|
|
||||||
# make log-Mel spectrogram and move to the same device as the model
|
# make log-Mel spectrogram and move to the same device as the model
|
||||||
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
mel = whisper.log_mel_spectrogram(audio, n_mels=model.dims.n_mels).to(model.device)
|
||||||
|
|
||||||
# detect the spoken language
|
# detect the spoken language
|
||||||
_, probs = model.detect_language(mel)
|
_, probs = model.detect_language(mel)
|
||||||
|
@ -122,7 +122,7 @@ def log_mel_spectrogram(
|
|||||||
The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
|
The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
|
||||||
|
|
||||||
n_mels: int
|
n_mels: int
|
||||||
The number of Mel-frequency filters, only 80 is supported
|
The number of Mel-frequency filters, only 80 and 128 are supported
|
||||||
|
|
||||||
padding: int
|
padding: int
|
||||||
Number of zero samples to pad to the right
|
Number of zero samples to pad to the right
|
||||||
@ -132,7 +132,7 @@ def log_mel_spectrogram(
|
|||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
torch.Tensor, shape = (80, n_frames)
|
torch.Tensor, shape = (n_mels, n_frames)
|
||||||
A Tensor that contains the Mel spectrogram
|
A Tensor that contains the Mel spectrogram
|
||||||
"""
|
"""
|
||||||
if not torch.is_tensor(audio):
|
if not torch.is_tensor(audio):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user