Merge f38acdff61da797d394a98dbc436587fbc10c663 into c0d2f624c09dc18e709e37c2ad90c039a4eb72a2

This commit is contained in:
Erfan Tarighi 2025-10-02 23:41:37 +02:00 committed by GitHub
commit 88792db279
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 10 additions and 2 deletions

View File

@ -2,7 +2,7 @@ import itertools
import subprocess import subprocess
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import TYPE_CHECKING, List from typing import TYPE_CHECKING, List, Optional, Callable
import numba import numba
import numpy as np import numpy as np
@ -286,6 +286,7 @@ def add_word_timestamps(
prepend_punctuations: str = "\"'“¿([{-", prepend_punctuations: str = "\"'“¿([{-",
append_punctuations: str = "\"'.。,!?::”)]}、", append_punctuations: str = "\"'.。,!?::”)]}、",
last_speech_timestamp: float, last_speech_timestamp: float,
word_stream_callback: Optional[Callable] = None,
**kwargs, **kwargs,
): ):
if len(segments) == 0: if len(segments) == 0:
@ -329,6 +330,8 @@ def add_word_timestamps(
timing = alignment[word_index] timing = alignment[word_index]
if timing.word: if timing.word:
if word_stream_callback is not None:
word_stream_callback(timing)
words.append( words.append(
dict( dict(
word=timing.word, word=timing.word,

View File

@ -2,7 +2,7 @@ import argparse
import os import os
import traceback import traceback
import warnings import warnings
from typing import TYPE_CHECKING, List, Optional, Tuple, Union from typing import TYPE_CHECKING, List, Optional, Tuple, Union, Callable
import numpy as np import numpy as np
import torch import torch
@ -40,6 +40,7 @@ def transcribe(
audio: Union[str, np.ndarray, torch.Tensor], audio: Union[str, np.ndarray, torch.Tensor],
*, *,
verbose: Optional[bool] = None, verbose: Optional[bool] = None,
word_stream_callback: Optional[Callable] = None,
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0), temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
compression_ratio_threshold: Optional[float] = 2.4, compression_ratio_threshold: Optional[float] = 2.4,
logprob_threshold: Optional[float] = -1.0, logprob_threshold: Optional[float] = -1.0,
@ -69,6 +70,9 @@ def transcribe(
Whether to display the text being decoded to the console. If True, displays all the details, Whether to display the text being decoded to the console. If True, displays all the details,
If False, displays minimal details. If None, does not display anything If False, displays minimal details. If None, does not display anything
word_stream_callback: Callable
Function that receives ready words as the other voice chunks are in progress.
temperature: Union[float, Tuple[float, ...]] temperature: Union[float, Tuple[float, ...]]
Temperature for sampling. It can be a tuple of temperatures, which will be successively used Temperature for sampling. It can be a tuple of temperatures, which will be successively used
upon failures according to either `compression_ratio_threshold` or `logprob_threshold`. upon failures according to either `compression_ratio_threshold` or `logprob_threshold`.
@ -408,6 +412,7 @@ def transcribe(
prepend_punctuations=prepend_punctuations, prepend_punctuations=prepend_punctuations,
append_punctuations=append_punctuations, append_punctuations=append_punctuations,
last_speech_timestamp=last_speech_timestamp, last_speech_timestamp=last_speech_timestamp,
word_stream_callback=word_stream_callback
) )
if not single_timestamp_ending: if not single_timestamp_ending: