mirror of
https://github.com/openai/whisper.git
synced 2025-11-24 06:26:03 +00:00
Merge d5328a849f4e599706e33efdbbca37cff875a649 into c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
This commit is contained in:
commit
c97d835dab
@ -18,18 +18,20 @@ if TYPE_CHECKING:
|
|||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def detect_language(
|
def detect_language(
|
||||||
model: "Whisper", mel: Tensor, tokenizer: Tokenizer = None
|
model: "Whisper", mel: Tensor, tokenizer: Tokenizer = None
|
||||||
) -> Tuple[Tensor, List[dict]]:
|
) -> Tuple[Tensor, Union[Dict[str,float], List[Dict[str,float]]]]:
|
||||||
"""
|
"""
|
||||||
Detect the spoken language in the audio, and return them as list of strings, along with the ids
|
Detect the spoken language in the audio, and return them as a list of strings, along with the ids
|
||||||
of the most probable language tokens and the probability distribution over all language tokens.
|
of the most probable language tokens and the probability distribution over all language tokens.
|
||||||
This is performed outside the main decode loop in order to not interfere with kv-caching.
|
This is performed outside the main decode loop in order to not interfere with kv-caching.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
language_tokens : Tensor, shape = (n_audio,)
|
language_tokens : Tensor, shape = (n_audio,)
|
||||||
ids of the most probable language tokens, which appears after the startoftranscript token.
|
ids of the most probable language tokens, which appear after the startoftranscript token.
|
||||||
language_probs : List[Dict[str, float]], length = n_audio
|
language_probs : Union[Dict[str, float], List[Dict[str, float]]]
|
||||||
list of dictionaries containing the probability distribution over all languages.
|
If the input contains a single audio sample, this will be a dictionary containing the
|
||||||
|
probability distribution over all languages for that sample. If the input contains multiple
|
||||||
|
audio samples, this will be a list of such dictionaries, one for each sample (length = n_audio).
|
||||||
"""
|
"""
|
||||||
if tokenizer is None:
|
if tokenizer is None:
|
||||||
tokenizer = get_tokenizer(
|
tokenizer = get_tokenizer(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user