From c85eaaae293366fb1ac6a4e1c06cb374e4c341de Mon Sep 17 00:00:00 2001 From: hanacchi Date: Fri, 23 Sep 2022 05:10:55 +0200 Subject: [PATCH] Use UTF-8 encoding to save the txt and vtt files (#37) Explicitly set the text encoding to UTF-8 in order to avoid UnicodeEncodeErrors Co-authored-by: Jong Wook Kim --- whisper/transcribe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/whisper/transcribe.py b/whisper/transcribe.py index a795f73..0ee42a8 100644 --- a/whisper/transcribe.py +++ b/whisper/transcribe.py @@ -289,11 +289,11 @@ def cli(): audio_basename = os.path.basename(audio_path) # save TXT - with open(os.path.join(output_dir, audio_basename + ".txt"), "w") as txt: + with open(os.path.join(output_dir, audio_basename + ".txt"), "w", encoding="utf-8") as txt: print(result["text"], file=txt) # save VTT - with open(os.path.join(output_dir, audio_basename + ".vtt"), "w") as vtt: + with open(os.path.join(output_dir, audio_basename + ".vtt"), "w", encoding="utf-8") as vtt: write_vtt(result["segments"], file=vtt)