mirror of
https://github.com/openai/whisper.git
synced 2025-11-23 22:15:58 +00:00
Merge f38acdff61da797d394a98dbc436587fbc10c663 into c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
This commit is contained in:
commit
88792db279
@ -2,7 +2,7 @@ import itertools
|
|||||||
import subprocess
|
import subprocess
|
||||||
import warnings
|
import warnings
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING, List
|
from typing import TYPE_CHECKING, List, Optional, Callable
|
||||||
|
|
||||||
import numba
|
import numba
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -286,6 +286,7 @@ def add_word_timestamps(
|
|||||||
prepend_punctuations: str = "\"'“¿([{-",
|
prepend_punctuations: str = "\"'“¿([{-",
|
||||||
append_punctuations: str = "\"'.。,,!!??::”)]}、",
|
append_punctuations: str = "\"'.。,,!!??::”)]}、",
|
||||||
last_speech_timestamp: float,
|
last_speech_timestamp: float,
|
||||||
|
word_stream_callback: Optional[Callable] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
if len(segments) == 0:
|
if len(segments) == 0:
|
||||||
@ -329,6 +330,8 @@ def add_word_timestamps(
|
|||||||
timing = alignment[word_index]
|
timing = alignment[word_index]
|
||||||
|
|
||||||
if timing.word:
|
if timing.word:
|
||||||
|
if word_stream_callback is not None:
|
||||||
|
word_stream_callback(timing)
|
||||||
words.append(
|
words.append(
|
||||||
dict(
|
dict(
|
||||||
word=timing.word,
|
word=timing.word,
|
||||||
|
|||||||
@ -2,7 +2,7 @@ import argparse
|
|||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
import warnings
|
import warnings
|
||||||
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
|
from typing import TYPE_CHECKING, List, Optional, Tuple, Union, Callable
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@ -40,6 +40,7 @@ def transcribe(
|
|||||||
audio: Union[str, np.ndarray, torch.Tensor],
|
audio: Union[str, np.ndarray, torch.Tensor],
|
||||||
*,
|
*,
|
||||||
verbose: Optional[bool] = None,
|
verbose: Optional[bool] = None,
|
||||||
|
word_stream_callback: Optional[Callable] = None,
|
||||||
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
||||||
compression_ratio_threshold: Optional[float] = 2.4,
|
compression_ratio_threshold: Optional[float] = 2.4,
|
||||||
logprob_threshold: Optional[float] = -1.0,
|
logprob_threshold: Optional[float] = -1.0,
|
||||||
@ -69,6 +70,9 @@ def transcribe(
|
|||||||
Whether to display the text being decoded to the console. If True, displays all the details,
|
Whether to display the text being decoded to the console. If True, displays all the details,
|
||||||
If False, displays minimal details. If None, does not display anything
|
If False, displays minimal details. If None, does not display anything
|
||||||
|
|
||||||
|
word_stream_callback: Callable
|
||||||
|
Function that receives ready words as the other voice chunks are in progress.
|
||||||
|
|
||||||
temperature: Union[float, Tuple[float, ...]]
|
temperature: Union[float, Tuple[float, ...]]
|
||||||
Temperature for sampling. It can be a tuple of temperatures, which will be successively used
|
Temperature for sampling. It can be a tuple of temperatures, which will be successively used
|
||||||
upon failures according to either `compression_ratio_threshold` or `logprob_threshold`.
|
upon failures according to either `compression_ratio_threshold` or `logprob_threshold`.
|
||||||
@ -408,6 +412,7 @@ def transcribe(
|
|||||||
prepend_punctuations=prepend_punctuations,
|
prepend_punctuations=prepend_punctuations,
|
||||||
append_punctuations=append_punctuations,
|
append_punctuations=append_punctuations,
|
||||||
last_speech_timestamp=last_speech_timestamp,
|
last_speech_timestamp=last_speech_timestamp,
|
||||||
|
word_stream_callback=word_stream_callback
|
||||||
)
|
)
|
||||||
|
|
||||||
if not single_timestamp_ending:
|
if not single_timestamp_ending:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user