[ADD] word_stream_callback to get the ready words for streaming to the users or start another process sooner.

This commit is contained in:
Erfan Tarighi 2024-08-19 18:32:31 +02:00
parent ba3f3cd54b
commit 9cd08df3ed
2 changed files with 10 additions and 2 deletions

View File

@ -2,7 +2,7 @@ import itertools
import subprocess import subprocess
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING, List from typing import TYPE_CHECKING, List, Optional, Callable
import numba import numba
import numpy as np import numpy as np
@ -284,6 +284,7 @@ def add_word_timestamps(
prepend_punctuations: str = "\"'“¿([{-", prepend_punctuations: str = "\"'“¿([{-",
append_punctuations: str = "\"'.。,!?::”)]}、", append_punctuations: str = "\"'.。,!?::”)]}、",
last_speech_timestamp: float, last_speech_timestamp: float,
word_stream_callback: Optional[Callable] = None,
**kwargs, **kwargs,
): ):
if len(segments) == 0: if len(segments) == 0:
@ -327,6 +328,8 @@ def add_word_timestamps(
timing = alignment[word_index] timing = alignment[word_index]
if timing.word: if timing.word:
if word_stream_callback is not None:
word_stream_callback(timing)
words.append( words.append(
dict( dict(
word=timing.word, word=timing.word,

View File

@ -2,7 +2,7 @@ import argparse
import os import os
import traceback import traceback
import warnings import warnings
from typing import TYPE_CHECKING, List, Optional, Tuple, Union from typing import TYPE_CHECKING, List, Optional, Tuple, Union, Callable
import numpy as np import numpy as np
import torch import torch
@ -40,6 +40,7 @@ def transcribe(
audio: Union[str, np.ndarray, torch.Tensor], audio: Union[str, np.ndarray, torch.Tensor],
*, *,
verbose: Optional[bool] = None, verbose: Optional[bool] = None,
word_stream_callback: Optional[Callable] = None,
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0), temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
compression_ratio_threshold: Optional[float] = 2.4, compression_ratio_threshold: Optional[float] = 2.4,
logprob_threshold: Optional[float] = -1.0, logprob_threshold: Optional[float] = -1.0,
@ -68,6 +69,9 @@ def transcribe(
Whether to display the text being decoded to the console. If True, displays all the details, Whether to display the text being decoded to the console. If True, displays all the details,
If False, displays minimal details. If None, does not display anything If False, displays minimal details. If None, does not display anything
word_stream_callback: Callable
A function to get the words as stream
temperature: Union[float, Tuple[float, ...]] temperature: Union[float, Tuple[float, ...]]
Temperature for sampling. It can be a tuple of temperatures, which will be successively used Temperature for sampling. It can be a tuple of temperatures, which will be successively used
upon failures according to either `compression_ratio_threshold` or `logprob_threshold`. upon failures according to either `compression_ratio_threshold` or `logprob_threshold`.
@ -392,6 +396,7 @@ def transcribe(
prepend_punctuations=prepend_punctuations, prepend_punctuations=prepend_punctuations,
append_punctuations=append_punctuations, append_punctuations=append_punctuations,
last_speech_timestamp=last_speech_timestamp, last_speech_timestamp=last_speech_timestamp,
word_stream_callback=word_stream_callback
) )
if not single_timestamp_ending: if not single_timestamp_ending: