mirror of
https://github.com/openai/whisper.git
synced 2025-11-26 15:35:57 +00:00
Update transcribe.py
Typos
This commit is contained in:
parent
bb8c47519d
commit
80ddd07c28
@ -42,7 +42,7 @@ def transcribe(
|
|||||||
verbose: Optional[bool] = None,
|
verbose: Optional[bool] = None,
|
||||||
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
temperature: Union[float, Tuple[float, ...]] = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
|
||||||
compression_ratio_threshold: Optional[float] = 2.4,
|
compression_ratio_threshold: Optional[float] = 2.4,
|
||||||
compression_ratio_halucination_threshold: Optional[float] = 3,
|
compression_ratio_hallucination_threshold: Optional[float] = 3,
|
||||||
logprob_threshold: Optional[float] = -1.0,
|
logprob_threshold: Optional[float] = -1.0,
|
||||||
no_speech_threshold: Optional[float] = 0.6,
|
no_speech_threshold: Optional[float] = 0.6,
|
||||||
condition_on_previous_text: bool = True,
|
condition_on_previous_text: bool = True,
|
||||||
@ -76,7 +76,7 @@ def transcribe(
|
|||||||
compression_ratio_threshold: float
|
compression_ratio_threshold: float
|
||||||
If the gzip compression ratio is above this value, treat as failed
|
If the gzip compression ratio is above this value, treat as failed
|
||||||
|
|
||||||
compression_ratio_halcination_threshold: float
|
compression_ratio_hallucination_threshold: float
|
||||||
If the gzip compression ratio is above this value after all attempts to decode, treat as a halucination and skip
|
If the gzip compression ratio is above this value after all attempts to decode, treat as a halucination and skip
|
||||||
|
|
||||||
logprob_threshold: float
|
logprob_threshold: float
|
||||||
@ -106,11 +106,6 @@ def transcribe(
|
|||||||
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
"prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
|
||||||
to make it more likely to predict those word correctly.
|
to make it more likely to predict those word correctly.
|
||||||
|
|
||||||
carry_initial_prompt: bool
|
|
||||||
If carry_initial_prompt is True, `initial_prompt` is prepended to the prompt of each internal
|
|
||||||
`decode()` call. If there is not enough context space at the start of the prompt, it is
|
|
||||||
left-sliced to make space.
|
|
||||||
|
|
||||||
decode_options: dict
|
decode_options: dict
|
||||||
Keyword arguments to construct `DecodingOptions` instances
|
Keyword arguments to construct `DecodingOptions` instances
|
||||||
|
|
||||||
@ -220,12 +215,14 @@ def transcribe(
|
|||||||
):
|
):
|
||||||
needs_fallback = False # silence
|
needs_fallback = False # silence
|
||||||
if (
|
if (
|
||||||
compression_ratio_halucination_threshold is not None
|
compression_ratio_hallucination_threshold is not None
|
||||||
and decode_result.compression_ratio > compression_ratio_halucination_threshold
|
and decode_result.compression_ratio > compression_ratio_hallucination_threshold
|
||||||
and t == temperatures[-1]
|
and t == temperatures[-1]
|
||||||
):
|
):
|
||||||
# Discard the segment
|
# Discard the segment
|
||||||
continue # Skip to the next segment
|
continue # Skip to the next segment
|
||||||
|
|
||||||
|
|
||||||
if not needs_fallback:
|
if not needs_fallback:
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -243,11 +240,9 @@ def transcribe(
|
|||||||
all_segments = []
|
all_segments = []
|
||||||
prompt_reset_since = 0
|
prompt_reset_since = 0
|
||||||
|
|
||||||
remaining_prompt_length = model.dims.n_text_ctx // 2 - 1
|
|
||||||
if initial_prompt is not None:
|
if initial_prompt is not None:
|
||||||
initial_prompt_tokens = tokenizer.encode(" " + initial_prompt.strip())
|
initial_prompt_tokens = tokenizer.encode(" " + initial_prompt.strip())
|
||||||
all_tokens.extend(initial_prompt_tokens)
|
all_tokens.extend(initial_prompt_tokens)
|
||||||
remaining_prompt_length -= len(initial_prompt_tokens)
|
|
||||||
else:
|
else:
|
||||||
initial_prompt_tokens = []
|
initial_prompt_tokens = []
|
||||||
|
|
||||||
@ -293,13 +288,7 @@ def transcribe(
|
|||||||
segment_duration = segment_size * HOP_LENGTH / SAMPLE_RATE
|
segment_duration = segment_size * HOP_LENGTH / SAMPLE_RATE
|
||||||
mel_segment = pad_or_trim(mel_segment, N_FRAMES).to(model.device).to(dtype)
|
mel_segment = pad_or_trim(mel_segment, N_FRAMES).to(model.device).to(dtype)
|
||||||
|
|
||||||
if carry_initial_prompt:
|
decode_options["prompt"] = all_tokens[prompt_reset_since:]
|
||||||
nignored = max(len(initial_prompt_tokens), prompt_reset_since)
|
|
||||||
remaining_prompt = all_tokens[nignored:][-remaining_prompt_length:]
|
|
||||||
decode_options["prompt"] = initial_prompt_tokens + remaining_prompt
|
|
||||||
else:
|
|
||||||
decode_options["prompt"] = all_tokens[prompt_reset_since:]
|
|
||||||
|
|
||||||
result: DecodingResult = decode_with_fallback(mel_segment)
|
result: DecodingResult = decode_with_fallback(mel_segment)
|
||||||
tokens = torch.tensor(result.tokens)
|
tokens = torch.tensor(result.tokens)
|
||||||
|
|
||||||
@ -553,8 +542,6 @@ def cli():
|
|||||||
|
|
||||||
parser.add_argument("--suppress_tokens", type=str, default="-1", help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations")
|
parser.add_argument("--suppress_tokens", type=str, default="-1", help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations")
|
||||||
parser.add_argument("--initial_prompt", type=str, default=None, help="optional text to provide as a prompt for the first window.")
|
parser.add_argument("--initial_prompt", type=str, default=None, help="optional text to provide as a prompt for the first window.")
|
||||||
parser.add_argument("--carry_initial_prompt", type=str2bool, default=False, help="if True, prepend initial_prompt to every internal decode() call. May reduce the effectiveness of condition_on_previous_text")
|
|
||||||
|
|
||||||
parser.add_argument("--condition_on_previous_text", type=str2bool, default=True, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop")
|
parser.add_argument("--condition_on_previous_text", type=str2bool, default=True, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop")
|
||||||
parser.add_argument("--fp16", type=str2bool, default=True, help="whether to perform inference in fp16; True by default")
|
parser.add_argument("--fp16", type=str2bool, default=True, help="whether to perform inference in fp16; True by default")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user