Omit space prefix in initial_prompt for spaceless languages.

This commit is contained in:
Ryan Heise 2024-03-31 13:11:45 +11:00
parent ba3f3cd54b
commit 21999e1702

View File

@ -228,7 +228,8 @@ def transcribe(
prompt_reset_since = 0 prompt_reset_since = 0
if initial_prompt is not None: if initial_prompt is not None:
initial_prompt_tokens = tokenizer.encode(" " + initial_prompt.strip()) space = "" if language in {"zh", "ja", "th", "lo", "my", "yue"} else " "
initial_prompt_tokens = tokenizer.encode(space + initial_prompt.strip())
all_tokens.extend(initial_prompt_tokens) all_tokens.extend(initial_prompt_tokens)
else: else:
initial_prompt_tokens = [] initial_prompt_tokens = []