From e334ff141d5444fbf6904edaaf408e5b0b416fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20BOYER?= Date: Fri, 5 May 2023 02:02:36 +0200 Subject: [PATCH] Avoid computing higher temperatures on no_speech segments (#1279) * Avoid computing higher temperatures on no_speech In decode_with_fallback, we compute higher temperatures in the case where compression_ratio is too high or avg_logprob is too low. But as the computation of no_speech_prob doens't depend on sampling, we can avoid computing higher temperatures if we detect in the first one that the no_speech condition is fulfilled * Update transcribe.py --------- Co-authored-by: Jong Wook Kim --- whisper/transcribe.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/whisper/transcribe.py b/whisper/transcribe.py index 84feb12..cba59ec 100644 --- a/whisper/transcribe.py +++ b/whisper/transcribe.py @@ -174,7 +174,11 @@ def transcribe( and decode_result.avg_logprob < logprob_threshold ): needs_fallback = True # average log probability is too low - + if ( + no_speech_threshold is not None + and decode_result.no_speech_prob > no_speech_threshold + ): + needs_fallback = False # silence if not needs_fallback: break