This commit is contained in:
Jong Wook Kim 2023-05-05 01:40:00 -07:00 committed by GitHub
parent 248b6cb124
commit f53a161d55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -215,6 +215,8 @@ def find_alignment(
words, word_tokens = tokenizer.split_to_word_tokens(text_tokens + [tokenizer.eot]) words, word_tokens = tokenizer.split_to_word_tokens(text_tokens + [tokenizer.eot])
word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0)) word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0))
if len(word_boundaries) <= 1:
return []
jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool) jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
jump_times = time_indices[jumps] / TOKENS_PER_SECOND jump_times = time_indices[jumps] / TOKENS_PER_SECOND