formatting fix

2025-11-23 22:15:58 +00:00 · 2023-11-06 10:09:38 -08:00 · 2023-11-06 10:09:38 -08:00 · 0efe52add2
commit 0efe52add2
parent 4aa8014589
3 changed files with 11 additions and 5 deletions
--- a/whisper/init.py
+++ b/whisper/init.py
@ -42,8 +42,8 @@ _ALIGNMENT_HEADS = {
    "medium": b"ABzY8B0Jh+0{>%R7}kK1fFL7w6%<-Pf*t^=N)Qr&0RR9",
    "large-v1": b"ABzY8r9j$a0{>%R7#4sLmoOs{s)o3~84-RPdcFk!JR<kSfC2yj",
    "large-v2": b"ABzY8zd+h!0{>%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj",
-    "large-v3": b'ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00',
-    "large": b'ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00',
+    "large-v3": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00",
+    "large": b"ABzY8gWO1E0{>%R7(9S+Kn!D~%ngiGaR?*L!iJG9p-nab0JQ=-{D1-g00",
 }


--- a/whisper/decoding.py
+++ b/whisper/decoding.py
@ -32,7 +32,9 @@ def detect_language(
        list of dictionaries containing the probability distribution over all languages.
    """
    if tokenizer is None:
-        tokenizer = get_tokenizer(model.is_multilingual, num_languages=model.num_languages)
+        tokenizer = get_tokenizer(
+            model.is_multilingual, num_languages=model.num_languages
+        )
    if (
        tokenizer.language is None
        or tokenizer.language_token not in tokenizer.sot_sequence
--- a/whisper/tokenizer.py
+++ b/whisper/tokenizer.py
@ -251,7 +251,9 @@ class Tokenizer:
        keeping basic punctuations like commas, periods, question marks, exclamation points, etc.
        """
        symbols = list('"#()*+/:;<=>@[\\]^_`{|}~「」『』')
-        symbols += "<< >> <<< >>> -- --- -( -[ (' (\" (( )) ((( ))) [[ ]] {{ }} ♪♪ ♪♪♪".split()
+        symbols += (
+            "<< >> <<< >>> -- --- -( -[ (' (\" (( )) ((( ))) [[ ]] {{ }} ♪♪ ♪♪♪".split()
+        )

        # symbols that may be a single token or multiple tokens depending on the tokenizer.
        # In case they're multiple tokens, suppress the first token, which is safe because:
@ -388,4 +390,6 @@ def get_tokenizer(

    encoding = get_encoding(name=encoding_name, num_languages=num_languages)

-    return Tokenizer(encoding=encoding, num_languages=num_languages, language=language, task=task)
+    return Tokenizer(
+        encoding=encoding, num_languages=num_languages, language=language, task=task
+    )