From 5dff4db81ac1d9d1b08a4904c2e9f1b560d3d932 Mon Sep 17 00:00:00 2001 From: Learpcs <34576126+Learpcs@users.noreply.github.com> Date: Thu, 26 Jun 2025 03:55:15 +0400 Subject: [PATCH 1/8] Fix: GitHub display errors for Jupyter notebooks (#2589) * Update LibriSpeech.ipynb Update LibriSpeech.ipynb * Update Multilingual_ASR.ipynb --- notebooks/LibriSpeech.ipynb | 3 ++- notebooks/Multilingual_ASR.ipynb | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/notebooks/LibriSpeech.ipynb b/notebooks/LibriSpeech.ipynb index 3d90e65..602bbe4 100644 --- a/notebooks/LibriSpeech.ipynb +++ b/notebooks/LibriSpeech.ipynb @@ -949,7 +949,8 @@ "style": "IPY_MODEL_039b53f2702c4179af7e0548018d0588", "value": " 164/164 [05:08<00:00, 1.86s/it]" } - } + }, + "state": {} } } }, diff --git a/notebooks/Multilingual_ASR.ipynb b/notebooks/Multilingual_ASR.ipynb index 2d32e0e..f19e3e0 100644 --- a/notebooks/Multilingual_ASR.ipynb +++ b/notebooks/Multilingual_ASR.ipynb @@ -4219,7 +4219,8 @@ "_view_name": "StyleView", "description_width": "" } - } + }, + "state": {} } } }, From 86899243e9fd1047a04a0e3991ef4b239c639d56 Mon Sep 17 00:00:00 2001 From: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> Date: Thu, 26 Jun 2025 02:02:54 +0200 Subject: [PATCH 2/8] Fixed triton kernel update to support latest triton versions (#2588) * Update triton kernel using _unsafe_update_src * support old triton versions * refactored changes to update triton kernel only once * Update triton_ops.py --------- Co-authored-by: Jong Wook Kim Co-authored-by: Jong Wook Kim --- whisper/triton_ops.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/whisper/triton_ops.py b/whisper/triton_ops.py index edd4564..13d417b 100644 --- a/whisper/triton_ops.py +++ b/whisper/triton_ops.py @@ -60,7 +60,7 @@ def median_kernel(filter_width: int): tl.store(y_ptr + offsets, MIDDLE_ROW_HERE, mask=mask) # noqa: F821 kernel = triton.JITFunction(kernel.fn) - kernel.src = kernel.src.replace( + new_kernel = kernel.src.replace( " LOAD_ALL_ROWS_HERE", "\n".join( [ @@ -69,7 +69,8 @@ def median_kernel(filter_width: int): ] ), ) - kernel.src = kernel.src.replace( + + new_kernel = new_kernel.replace( " BUBBLESORT_HERE", "\n\n".join( [ @@ -90,7 +91,14 @@ def median_kernel(filter_width: int): ] ), ) - kernel.src = kernel.src.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}") + + new_kernel = new_kernel.replace("MIDDLE_ROW_HERE", f"row{filter_width // 2}") + + if hasattr(kernel, "_unsafe_update_src") is True: + kernel._unsafe_update_src(new_kernel) + kernel.hash = None + else: + kernel.src = new_kernel return kernel From f50c4f264e072d17da320fb7266ef55f791fcc35 Mon Sep 17 00:00:00 2001 From: "Nicholas Nadeau, Ph.D., P.Eng." <6395915+engnadeau@users.noreply.github.com> Date: Wed, 25 Jun 2025 20:03:47 -0400 Subject: [PATCH 3/8] docs: updated README to specify translation model limitation (#2547) Updated README given info from https://github.com/openai/whisper/discussions/2483 --- README.md | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 696869c..196b48f 100644 --- a/README.md +++ b/README.md @@ -77,25 +77,35 @@ Whisper's performance varies widely depending on the language. The figure below ![WER breakdown by language](https://github.com/openai/whisper/assets/266841/f4619d66-1058-4005-8f67-a9d811b77c62) - - ## Command-line usage The following command will transcribe speech in audio files, using the `turbo` model: - whisper audio.flac audio.mp3 audio.wav --model turbo +```bash +whisper audio.flac audio.mp3 audio.wav --model turbo +``` -The default setting (which selects the `turbo` model) works well for transcribing English. To transcribe an audio file containing non-English speech, you can specify the language using the `--language` option: +The default setting (which selects the `turbo` model) works well for transcribing English. However, **the `turbo` model is not trained for translation tasks**. If you need to **translate non-English speech into English**, use one of the **multilingual models** (`tiny`, `base`, `small`, `medium`, `large`) instead of `turbo`. - whisper japanese.wav --language Japanese +For example, to transcribe an audio file containing non-English speech, you can specify the language: -Adding `--task translate` will translate the speech into English: +```bash +whisper japanese.wav --language Japanese +``` - whisper japanese.wav --language Japanese --task translate +To **translate** speech into English, use: + +```bash +whisper japanese.wav --model medium --language Japanese --task translate +``` + +> **Note:** The `turbo` model will return the original language even if `--task translate` is specified. Use `medium` or `large` for the best translation results. Run the following to view all available options: - whisper --help +```bash +whisper --help +``` See [tokenizer.py](https://github.com/openai/whisper/blob/main/whisper/tokenizer.py) for the list of all available languages. From 679ae1d14167541384b4e732f80847e1c5095b19 Mon Sep 17 00:00:00 2001 From: Nathan Harmon Date: Wed, 25 Jun 2025 18:42:09 -0600 Subject: [PATCH 4/8] Fix: Ensure DTW cost tensor is on the same device as input tensor (#2561) Co-authored-by: Jong Wook Kim --- whisper/timing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper/timing.py b/whisper/timing.py index e563414..2340000 100644 --- a/whisper/timing.py +++ b/whisper/timing.py @@ -117,7 +117,7 @@ def dtw_cuda(x, BLOCK_SIZE=1024): x_skew = x_skew.T.contiguous() cost = torch.ones(N + M + 2, M + 2) * np.inf cost[0, 0] = 0 - cost = cost.cuda() + cost = cost.to(x.device) trace = torch.zeros_like(cost, dtype=torch.int32) dtw_kernel[(1,)]( From 1f8fc975d3f679035f55d9838158ab688e39a82e Mon Sep 17 00:00:00 2001 From: Dridi Yassin <73611344+yaslack@users.noreply.github.com> Date: Thu, 26 Jun 2025 02:54:30 +0200 Subject: [PATCH 5/8] =?UTF-8?q?Fix:=20Update=20torch.load=20to=20use=20wei?= =?UTF-8?q?ghts=5Fonly=3DTrue=20to=20prevent=20security=20w=E2=80=A6=20(#2?= =?UTF-8?q?451)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix: Update torch.load to use weights_only=True to prevent security warning * Update __init__.py * Update __init__.py --------- Co-authored-by: Jong Wook Kim --- whisper/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/whisper/__init__.py b/whisper/__init__.py index e210718..f284ec0 100644 --- a/whisper/__init__.py +++ b/whisper/__init__.py @@ -147,7 +147,8 @@ def load_model( with ( io.BytesIO(checkpoint_file) if in_memory else open(checkpoint_file, "rb") ) as fp: - checkpoint = torch.load(fp, map_location=device) + kwargs = {"weights_only": True} if torch.__version__ >= "1.13" else {} + checkpoint = torch.load(fp, map_location=device, **kwargs) del checkpoint_file dims = ModelDimensions(**checkpoint["dims"]) From 31243bad24cc746f07d4c8bfdd2d974872cb1803 Mon Sep 17 00:00:00 2001 From: Jong Wook Kim Date: Wed, 25 Jun 2025 18:00:48 -0700 Subject: [PATCH 6/8] Release 20250625 --- CHANGELOG.md | 21 +++++++++++++++++++++ whisper/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7152899..0876010 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # CHANGELOG +## [v20250625](https://github.com/openai/whisper/releases/tag/v20250625) + +* Fix: Update torch.load to use weights_only=True to prevent security w… ([#2451](https://github.com/openai/whisper/pull/2451)) +* Fix: Ensure DTW cost tensor is on the same device as input tensor ([#2561](https://github.com/openai/whisper/pull/2561)) +* docs: updated README to specify translation model limitation ([#2547](https://github.com/openai/whisper/pull/2547)) +* Fixed triton kernel update to support latest triton versions ([#2588](https://github.com/openai/whisper/pull/2588)) +* Fix: GitHub display errors for Jupyter notebooks ([#2589](https://github.com/openai/whisper/pull/2589)) +* Bump the github-actions group with 3 updates ([#2592](https://github.com/openai/whisper/pull/2592)) +* Keep GitHub Actions up to date with GitHub's Dependabot ([#2486](https://github.com/openai/whisper/pull/2486)) +* pre-commit: Upgrade black v25.1.0 and isort v6.0.0 ([#2514](https://github.com/openai/whisper/pull/2514)) +* GitHub Actions: Add Python 3.13 to the testing ([#2487](https://github.com/openai/whisper/pull/2487)) +* PEP 621: Migrate from setup.py to pyproject.toml ([#2435](https://github.com/openai/whisper/pull/2435)) +* pre-commit autoupdate && pre-commit run --all-files ([#2484](https://github.com/openai/whisper/pull/2484)) +* Upgrade GitHub Actions ([#2430](https://github.com/openai/whisper/pull/2430)) +* Bugfix: Illogical "Avoid computing higher temperatures on no_speech" ([#1903](https://github.com/openai/whisper/pull/1903)) +* Updating README and doc strings to reflect that n_mels can now be 128 ([#2049](https://github.com/openai/whisper/pull/2049)) +* fix typo data/README.md ([#2433](https://github.com/openai/whisper/pull/2433)) +* Update README.md ([#2379](https://github.com/openai/whisper/pull/2379)) +* Add option to carry initial_prompt with the sliding window ([#2343](https://github.com/openai/whisper/pull/2343)) +* more pytorch versions in tests ([#2408](https://github.com/openai/whisper/pull/2408)) + ## [v20240930](https://github.com/openai/whisper/releases/tag/v20240930) * allowing numpy 2 in tests ([#2362](https://github.com/openai/whisper/pull/2362)) diff --git a/whisper/version.py b/whisper/version.py index b4b3350..67426aa 100644 --- a/whisper/version.py +++ b/whisper/version.py @@ -1 +1 @@ -__version__ = "20240930" +__version__ = "20250625" From db7fbc75fe2369780cdcaf7c7ada2a684c47ed16 Mon Sep 17 00:00:00 2001 From: Jong Wook Kim Date: Wed, 25 Jun 2025 18:02:39 -0700 Subject: [PATCH 7/8] Release 20250625 --- .github/workflows/python-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 715c8eb..abc5356 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -17,7 +17,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.12' - name: Install dependencies run: | python -m pip install --upgrade pip From c0d2f624c09dc18e709e37c2ad90c039a4eb72a2 Mon Sep 17 00:00:00 2001 From: Jong Wook Kim Date: Wed, 25 Jun 2025 18:05:47 -0700 Subject: [PATCH 8/8] Release 20250625 --- .github/workflows/python-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index abc5356..ff8f122 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine + pip install setuptools wheel twine build - name: Release if: ${{ steps.regex-match.outputs.match != '' }} uses: softprops/action-gh-release@v2