Use UTF-8 encoding to save the txt and vtt files (#37)

Explicitly set the text encoding to UTF-8 in order to avoid UnicodeEncodeErrors

Co-authored-by: Jong Wook Kim <jongwook@nyu.edu>
This commit is contained in:
hanacchi 2022-09-23 05:10:55 +02:00 committed by GitHub
parent 759e8d47a8
commit c85eaaae29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -289,11 +289,11 @@ def cli():
audio_basename = os.path.basename(audio_path)
# save TXT
with open(os.path.join(output_dir, audio_basename + ".txt"), "w") as txt:
with open(os.path.join(output_dir, audio_basename + ".txt"), "w", encoding="utf-8") as txt:
print(result["text"], file=txt)
# save VTT
with open(os.path.join(output_dir, audio_basename + ".vtt"), "w") as vtt:
with open(os.path.join(output_dir, audio_basename + ".vtt"), "w", encoding="utf-8") as vtt:
write_vtt(result["segments"], file=vtt)