[ADD] word_stream_callback to get the ready words for streaming to the users or start another process sooner.

This commit is contained in:
Erfan Tarighi 2024-08-19 18:32:31 +02:00
parent ba3f3cd54b
commit 9cd08df3ed
2 changed files with 10 additions and 2 deletions

View File

@ -2,7 +2,7 @@ import itertools
import subprocess
import warnings
from dataclasses import dataclass
from typing import TYPE_CHECKING, List
from typing import TYPE_CHECKING, List, Optional, Callable
import numba
import numpy as np
@ -284,6 +284,7 @@ def add_word_timestamps(
prepend_punctuations: str = "\"'“¿([{-",
append_punctuations: str = "\"'.。,!?::”)]}、",
last_speech_timestamp: float,
word_stream_callback: Optional[Callable] = None,
**kwargs,
):
if len(segments) == 0:
@ -327,6 +328,8 @@ def add_word_timestamps(
timing = alignment[word_index]
if timing.word:
if word_stream_callback is not None:
word_stream_callback(timing)
words.append(
dict(
word=timing.word,

View File

@ -2,7 +2,7 @@ import argparse
import os
import traceback
import warnings
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, List, Optional, Tuple, Union, Callable
import numpy as np
import torch
@ -40,6 +40,7 @@ def transcribe(
audio: Union[str, np.ndarray, torch.Tensor],
*,
verbose: Optional[bool] = None,
word_stream_callback: Optional[Callable] = None,
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
compression_ratio_threshold: Optional[float] = 2.4,
logprob_threshold: Optional[float] = -1.0,
@ -68,6 +69,9 @@ def transcribe(
Whether to display the text being decoded to the console. If True, displays all the details,
If False, displays minimal details. If None, does not display anything
word_stream_callback: Callable
A function to get the words as stream
temperature: Union[float, Tuple[float, ...]]
Temperature for sampling. It can be a tuple of temperatures, which will be successively used
upon failures according to either `compression_ratio_threshold` or `logprob_threshold`.
@ -392,6 +396,7 @@ def transcribe(
prepend_punctuations=prepend_punctuations,
append_punctuations=append_punctuations,
last_speech_timestamp=last_speech_timestamp,
word_stream_callback=word_stream_callback
)
if not single_timestamp_ending: