mirror of
https://github.com/openai/whisper.git
synced 2025-07-02 02:12:29 +00:00

* word-level timestamps in `transcribe()` * moving to `timing.py` * numba implementation for dtw, replacing dtw-python * triton implementation for dtw * add test for dtw implementations * triton implementation of median_filter * a simple word-level timestamps test * add scipy as dev dependency * installs an older version of Triton if CUDA < 11.4 * fix broken merge * loosen nvcc version match regex * find_alignment() function * miscellaneous improvements * skip median filtering when the input is too small * Expose punctuation options in cli and transcribe() (#973) * fix merge error * fix merge error 2 * annotating that word_timestamps is experimental --------- Co-authored-by: ryanheise <ryan@ryanheise.com>
35 lines
1.1 KiB
Python
35 lines
1.1 KiB
Python
import os
|
|
|
|
import pytest
|
|
import torch
|
|
|
|
import whisper
|
|
|
|
|
|
@pytest.mark.parametrize("model_name", whisper.available_models())
|
|
def test_transcribe(model_name: str):
|
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
model = whisper.load_model(model_name).to(device)
|
|
audio_path = os.path.join(os.path.dirname(__file__), "jfk.flac")
|
|
|
|
language = "en" if model_name.endswith(".en") else None
|
|
result = model.transcribe(audio_path, language=language, temperature=0.0, word_timestamps=True)
|
|
assert result["language"] == "en"
|
|
|
|
transcription = result["text"].lower()
|
|
assert "my fellow americans" in transcription
|
|
assert "your country" in transcription
|
|
assert "do for you" in transcription
|
|
|
|
timing_checked = False
|
|
for segment in result["segments"]:
|
|
for timing in segment["words"]:
|
|
assert timing["start"] < timing["end"]
|
|
if timing["word"].strip(" ,") == "Americans":
|
|
assert timing["start"] <= 1.8
|
|
assert timing["end"] >= 1.8
|
|
print(timing)
|
|
timing_checked = True
|
|
|
|
assert timing_checked
|