mirror of
https://github.com/openai/whisper.git
synced 2025-11-23 22:15:58 +00:00
feat: Add real-time streaming example with verification steps
This commit is contained in:
parent
c0d2f624c0
commit
2693ce1f98
5
whisper/examples/README.md
Normal file
5
whisper/examples/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
## Real-Time Streaming
|
||||
|
||||
For live microphone transcription:
|
||||
```python
|
||||
python examples/realtime_streaming.py
|
||||
77
whisper/examples/realtime_streaming.py
Normal file
77
whisper/examples/realtime_streaming.py
Normal file
@ -0,0 +1,77 @@
|
||||
import whisper
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def get_audio_config():
|
||||
"""Find any working audio configuration"""
|
||||
# Try different approaches in order
|
||||
approaches = [
|
||||
try_standard_rates,
|
||||
try_pulseaudio,
|
||||
try_direct_hw
|
||||
]
|
||||
|
||||
for approach in approaches:
|
||||
config = approach()
|
||||
if config:
|
||||
return config
|
||||
|
||||
print("\nFAILED TO FIND WORKING AUDIO CONFIGURATION")
|
||||
print("Possible solutions:")
|
||||
print("1. Run: sudo apt install alsa-utils pulseaudio")
|
||||
print("2. Check mic permissions: ls -l /dev/snd/")
|
||||
print("3. Try USB microphone")
|
||||
sys.exit(1)
|
||||
|
||||
def try_standard_rates():
|
||||
"""Try common sample rates"""
|
||||
for device_id in [None, 0, 1, 4, 11]:
|
||||
for rate in [16000, 44100, 48000]:
|
||||
try:
|
||||
sd.check_input_settings(device=device_id, samplerate=rate)
|
||||
return {'device': device_id, 'rate': rate}
|
||||
except:
|
||||
continue
|
||||
return None
|
||||
|
||||
def try_pulseaudio():
|
||||
"""Force PulseAudio configuration"""
|
||||
try:
|
||||
subprocess.run(['pacmd', 'list-sources'], check=True)
|
||||
return {'device': 'pulse', 'rate': 44100}
|
||||
except:
|
||||
return None
|
||||
|
||||
def try_direct_hw():
|
||||
"""Last-resort direct hardware access"""
|
||||
try:
|
||||
return {'device': 'hw:0,0', 'rate': 48000}
|
||||
except:
|
||||
return None
|
||||
|
||||
# Load model first to fail fast if issues
|
||||
MODEL = whisper.load_model("tiny")
|
||||
|
||||
# Get audio config
|
||||
config = get_audio_config()
|
||||
print(f"\nUsing audio config: {config}")
|
||||
|
||||
def callback(indata, frames, time, status):
|
||||
audio = indata[:, 0].astype(np.float32)
|
||||
result = MODEL.transcribe(audio)
|
||||
if result["text"].strip():
|
||||
print(result["text"], end=" ", flush=True)
|
||||
|
||||
print("\nStarting transcription... Speak now!")
|
||||
with sd.InputStream(
|
||||
device=config['device'],
|
||||
samplerate=config['rate'],
|
||||
channels=1,
|
||||
blocksize=2048,
|
||||
dtype='float32',
|
||||
callback=callback
|
||||
):
|
||||
while True:
|
||||
sd.sleep(100)
|
||||
Loading…
x
Reference in New Issue
Block a user