mirror of
https://github.com/openai/whisper.git
synced 2025-11-24 06:26:03 +00:00
58 lines
1.9 KiB
Python
58 lines
1.9 KiB
Python
# IMPORTANT: This is just for using the local whisper dir as the package directly. Delete until next comment when just installing whisper normally.
|
|
import sys
|
|
from pathlib import Path
|
|
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
|
# end of dev import
|
|
import whisper
|
|
|
|
import colorsys
|
|
from typing import List
|
|
from whisper.tokenizer import get_tokenizer
|
|
from colorama import init, Style
|
|
|
|
|
|
print('Loading model')
|
|
model = whisper.load_model("large")
|
|
|
|
|
|
print('Loading audio') # load audio and pad/trim it to fit 30 seconds
|
|
audio = whisper.load_audio("samples/your_audio.wav")
|
|
audio = whisper.pad_or_trim(audio)
|
|
|
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device) # make log-Mel spectrogram and move to the same device as the model
|
|
|
|
|
|
detect_lang = False
|
|
language = "en"
|
|
if detect_lang: # detect the spoken language
|
|
print('Detecting language')
|
|
_, probs = model.detect_language(mel)
|
|
print(f"Detected language: {max(probs, key=probs.get)}")
|
|
language=max(probs, key=probs.get)
|
|
|
|
|
|
print('Decoding audio') # decode the audio
|
|
options = whisper.DecodingOptions()
|
|
result = whisper.decode(model, mel, options)
|
|
|
|
|
|
def print_colored_text(tokens: List[int], token_probs: List[float], tokenizer):
|
|
init(autoreset=True) # Initialize colorama
|
|
text_tokens = [tokenizer.decode([t]) for t in tokens]
|
|
|
|
for token, prob in zip(text_tokens, token_probs):
|
|
# Interpolate between red and green in the HSV color space
|
|
r, g, b = colorsys.hsv_to_rgb(prob * (1/3), 1, 1)
|
|
r, g, b = int(r * 255), int(g * 255), int(b * 255)
|
|
color_code = f"\033[38;2;{r};{g};{b}m"
|
|
|
|
colored_token = f"{color_code}{Style.BRIGHT}{token}{Style.RESET_ALL}"
|
|
print(colored_token, end="")
|
|
|
|
print()
|
|
|
|
|
|
tokenizer = get_tokenizer(multilingual=model.is_multilingual, language=language, task=options.task)
|
|
print_colored_text(result.tokens, result.token_probs, tokenizer) # print text with fancy confidence colors
|