mirror of
https://github.com/openai/whisper.git
synced 2025-11-23 22:15:58 +00:00
word timing tweaks (#1559)
* word timing tweaks * comment on eot * clearer comments
This commit is contained in:
parent
b91c907694
commit
e8622f9afc
@ -214,6 +214,13 @@ def find_alignment(
|
|||||||
text_indices, time_indices = dtw(-matrix)
|
text_indices, time_indices = dtw(-matrix)
|
||||||
|
|
||||||
words, word_tokens = tokenizer.split_to_word_tokens(text_tokens + [tokenizer.eot])
|
words, word_tokens = tokenizer.split_to_word_tokens(text_tokens + [tokenizer.eot])
|
||||||
|
if len(word_tokens) <= 1:
|
||||||
|
# return on eot only
|
||||||
|
# >>> np.pad([], (1, 0))
|
||||||
|
# array([0.])
|
||||||
|
# This results in crashes when we lookup jump_times with float, like
|
||||||
|
# IndexError: arrays used as indices must be of integer (or boolean) type
|
||||||
|
return []
|
||||||
word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0))
|
word_boundaries = np.pad(np.cumsum([len(t) for t in word_tokens[:-1]]), (1, 0))
|
||||||
|
|
||||||
jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
|
jumps = np.pad(np.diff(text_indices), (1, 0), constant_values=1).astype(bool)
|
||||||
@ -297,8 +304,6 @@ def add_word_timestamps(
|
|||||||
# hack: truncate long words at sentence boundaries.
|
# hack: truncate long words at sentence boundaries.
|
||||||
# a better segmentation algorithm based on VAD should be able to replace this.
|
# a better segmentation algorithm based on VAD should be able to replace this.
|
||||||
if len(word_durations) > 0:
|
if len(word_durations) > 0:
|
||||||
median_duration = np.median(word_durations)
|
|
||||||
max_duration = median_duration * 2
|
|
||||||
sentence_end_marks = ".。!!??"
|
sentence_end_marks = ".。!!??"
|
||||||
# ensure words at sentence boundaries are not longer than twice the median word duration.
|
# ensure words at sentence boundaries are not longer than twice the median word duration.
|
||||||
for i in range(1, len(alignment)):
|
for i in range(1, len(alignment)):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user