mirror of
https://github.com/openai/whisper.git
synced 2025-11-24 06:26:03 +00:00
Merge abf6778935308f636c7d5c6bc2861f67cc699d53 into c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
This commit is contained in:
commit
085ed74035
@ -2,7 +2,7 @@ import argparse
|
|||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
import warnings
|
import warnings
|
||||||
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
|
from typing import TYPE_CHECKING, List, Optional, Tuple, Union, Callable
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
@ -52,6 +52,7 @@ def transcribe(
|
|||||||
append_punctuations: str = "\"'.。,,!!??::”)]}、",
|
append_punctuations: str = "\"'.。,,!!??::”)]}、",
|
||||||
clip_timestamps: Union[str, List[float]] = "0",
|
clip_timestamps: Union[str, List[float]] = "0",
|
||||||
hallucination_silence_threshold: Optional[float] = None,
|
hallucination_silence_threshold: Optional[float] = None,
|
||||||
|
callback: Optional[Callable[[int, int, float], None]] = None,
|
||||||
**decode_options,
|
**decode_options,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
@ -119,6 +120,10 @@ def transcribe(
|
|||||||
When word_timestamps is True, skip silent periods longer than this threshold (in seconds)
|
When word_timestamps is True, skip silent periods longer than this threshold (in seconds)
|
||||||
when a possible hallucination is detected
|
when a possible hallucination is detected
|
||||||
|
|
||||||
|
callback: Optional[Callable[int, int, float]] = None,
|
||||||
|
After each step in the transcription process, call the callback function with
|
||||||
|
the arguments current posistion, total frames, estimated time to finish in seconds
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
A dictionary containing the resulting text ("text") and segment-level details ("segments"), and
|
A dictionary containing the resulting text ("text") and segment-level details ("segments"), and
|
||||||
@ -504,8 +509,17 @@ def transcribe(
|
|||||||
# do not feed the prompt tokens if a high temperature was used
|
# do not feed the prompt tokens if a high temperature was used
|
||||||
prompt_reset_since = len(all_tokens)
|
prompt_reset_since = len(all_tokens)
|
||||||
|
|
||||||
|
total_position = min(content_frames, seek)
|
||||||
|
increase = total_position - previous_seek
|
||||||
|
|
||||||
|
if callback is not None:
|
||||||
|
rate = pbar.format_dict["rate"]
|
||||||
|
remaining = (pbar.total - pbar.n) / rate if rate and pbar.total else 0
|
||||||
|
|
||||||
|
callback(total_position, content_frames, remaining)
|
||||||
|
|
||||||
# update progress bar
|
# update progress bar
|
||||||
pbar.update(min(content_frames, seek) - previous_seek)
|
pbar.update(increase)
|
||||||
|
|
||||||
return dict(
|
return dict(
|
||||||
text=tokenizer.decode(all_tokens[len(initial_prompt_tokens) :]),
|
text=tokenizer.decode(all_tokens[len(initial_prompt_tokens) :]),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user