diff --git a/whisper/transcribe.py b/whisper/transcribe.py index 1c075a2..70d70ae 100644 --- a/whisper/transcribe.py +++ b/whisper/transcribe.py @@ -228,7 +228,8 @@ def transcribe( prompt_reset_since = 0 if initial_prompt is not None: - initial_prompt_tokens = tokenizer.encode(" " + initial_prompt.strip()) + space = "" if language in {"zh", "ja", "th", "lo", "my", "yue"} else " " + initial_prompt_tokens = tokenizer.encode(space + initial_prompt.strip()) all_tokens.extend(initial_prompt_tokens) else: initial_prompt_tokens = []