mirror of
https://github.com/openai/whisper.git
synced 2025-11-25 23:15:57 +00:00
56 lines
1.7 KiB
Python
56 lines
1.7 KiB
Python
import timeit
|
|
import whisper
|
|
from typing import Tuple
|
|
|
|
|
|
def load_model(model_name: str = "tiny.en") -> whisper.Whisper:
|
|
return whisper.load_model(model_name, ext_feature_flag=False)
|
|
|
|
|
|
def transcribe(model: whisper.Whisper, audio_path: str) -> Tuple[str, float]:
|
|
start_time = timeit.default_timer()
|
|
transcription = model.transcribe(audio_path).get("text", "")
|
|
elapsed_time = timeit.default_timer() - start_time
|
|
return transcription, elapsed_time
|
|
|
|
|
|
def calculate_wer(hypothesis: str, reference: str) -> float:
|
|
hyp_words = hypothesis.strip().lower().split()
|
|
ref_words = reference.strip().lower().split()
|
|
|
|
if not ref_words:
|
|
return float("inf") if hyp_words else 0.0
|
|
|
|
dp = [[0] * (len(hyp_words) + 1) for _ in range(len(ref_words) + 1)]
|
|
|
|
for i in range(len(ref_words) + 1):
|
|
dp[i][0] = i
|
|
for j in range(len(hyp_words) + 1):
|
|
dp[0][j] = j
|
|
|
|
for i in range(1, len(ref_words) + 1):
|
|
for j in range(1, len(hyp_words) + 1):
|
|
if ref_words[i - 1] == hyp_words[j - 1]:
|
|
dp[i][j] = dp[i - 1][j - 1]
|
|
else:
|
|
dp[i][j] = min(
|
|
dp[i - 1][j] + 1, # deletion
|
|
dp[i][j - 1] + 1, # insertion
|
|
dp[i - 1][j - 1] + 1, # substitution
|
|
)
|
|
|
|
return dp[len(ref_words)][len(hyp_words)] / len(ref_words)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
model = load_model()
|
|
audio_path = "test_data/30s/out000.wav"
|
|
transcript_path = "test_transcripts_before/30s/out000.txt"
|
|
|
|
hypothesis, elapsed_time = transcribe(model, audio_path)
|
|
with open(transcript_path, "r") as f:
|
|
reference = f.read()
|
|
|
|
wer = calculate_wer(hypothesis, reference)
|
|
print(f"WER: {wer:.4f}")
|