mirror of
https://github.com/openai/whisper.git
synced 2025-11-24 06:26:03 +00:00
Merge a0936816d5624f975a039ffeb1ca25d473e26d5b into c0d2f624c09dc18e709e37c2ad90c039a4eb72a2
This commit is contained in:
commit
b00382ac7b
@ -112,7 +112,13 @@ class DecodingOptions:
|
|||||||
|
|
||||||
# implementation details
|
# implementation details
|
||||||
fp16: bool = True # use fp16 for most of the calculation
|
fp16: bool = True # use fp16 for most of the calculation
|
||||||
|
bf16: bool = False # use bf16 for most of the calculation
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.fp16 and self.bf16:
|
||||||
|
raise ValueError("Both fp16 and bf16 cannot be True at the same time")
|
||||||
|
if self.bf16:
|
||||||
|
object.__setattr__(self, "fp16", False)
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class DecodingResult:
|
class DecodingResult:
|
||||||
@ -655,7 +661,9 @@ class DecodingTask:
|
|||||||
audio_features = self.model.encoder(mel)
|
audio_features = self.model.encoder(mel)
|
||||||
|
|
||||||
if audio_features.dtype != (
|
if audio_features.dtype != (
|
||||||
torch.float16 if self.options.fp16 else torch.float32
|
torch.float16 if self.options.fp16 else
|
||||||
|
torch.bfloat16 if self.options.bf16 else
|
||||||
|
torch.float32
|
||||||
):
|
):
|
||||||
return TypeError(
|
return TypeError(
|
||||||
f"audio_features has an incorrect dtype: {audio_features.dtype}"
|
f"audio_features has an incorrect dtype: {audio_features.dtype}"
|
||||||
|
|||||||
@ -132,7 +132,7 @@ def transcribe(
|
|||||||
warnings.warn("FP16 is not supported on CPU; using FP32 instead")
|
warnings.warn("FP16 is not supported on CPU; using FP32 instead")
|
||||||
dtype = torch.float32
|
dtype = torch.float32
|
||||||
|
|
||||||
if dtype == torch.float32:
|
if dtype == torch.float32 or dtype == torch.bfloat16:
|
||||||
decode_options["fp16"] = False
|
decode_options["fp16"] = False
|
||||||
|
|
||||||
# Pad 30-seconds of silence to the input audio, for slicing
|
# Pad 30-seconds of silence to the input audio, for slicing
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user