From 02b74308fff49aa0d5dd603faefa76d2edd8d56b Mon Sep 17 00:00:00 2001 From: Tom Stuart Date: Mon, 3 Oct 2022 22:51:07 +0100 Subject: [PATCH] Fix timestamps and strip extraneous whitespace in WebVTT output (#219) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Use two-digit hours in WebVTT timestamps Per the WebVTT specification [0]: > A WebVTT timestamp consists of the following components, in the given > order: > > 1. Optionally (required if hours is non-zero): > 1. Two or more ASCII digits, representing the hours as a base ten > integer. > 2. A U+003A COLON character (:) YouTube won’t accept timestamps containing single-digit hours. [0] https://www.w3.org/TR/webvtt1/#webvtt-timestamp * Strip segment text in WebVTT output We already do this for plain text and SubRip output, so we should do it for WebVTT too. --- whisper/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/whisper/utils.py b/whisper/utils.py index 1e35471..87c91f2 100644 --- a/whisper/utils.py +++ b/whisper/utils.py @@ -40,7 +40,7 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal seconds = milliseconds // 1_000 milliseconds -= seconds * 1_000 - hours_marker = f"{hours}:" if always_include_hours or hours > 0 else "" + hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" @@ -54,7 +54,7 @@ def write_vtt(transcript: Iterator[dict], file: TextIO): for segment in transcript: print( f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n" - f"{segment['text'].replace('-->', '->')}\n", + f"{segment['text'].strip().replace('-->', '->')}\n", file=file, flush=True, )