From bd9d47a977f1bc0ea471ba48b95da22bcd918b0a Mon Sep 17 00:00:00 2001 From: Kathy Reid Date: Wed, 8 Oct 2025 22:43:54 +1100 Subject: [PATCH] Update tokenizer.py to fix erroneous Javanese language code The ISO-639-1 code for Javanese is `jv` NOT `jw` as given here. It should be listed as `jv`. --- whisper/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper/tokenizer.py b/whisper/tokenizer.py index 2af8375..4bc0984 100644 --- a/whisper/tokenizer.py +++ b/whisper/tokenizer.py @@ -105,7 +105,7 @@ LANGUAGES = { "ln": "lingala", "ha": "hausa", "ba": "bashkir", - "jw": "javanese", + "jv": "javanese", "su": "sundanese", "yue": "cantonese", }