From 6c1d8f1ea10b85ec0a0ed584edb5ad9c8efc3195 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sat, 4 Jan 2025 09:47:12 +0100 Subject: [PATCH 1/4] Upgrade GitHub Actions (#2430) --- .github/workflows/test.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 84b81cc..106c66b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,10 +11,10 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Fetch base branch run: git fetch origin ${{ github.base_ref }} - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: "3.8" architecture: x64 @@ -23,7 +23,7 @@ jobs: run: | echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT - name: pip/pre-commit cache - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: | ${{ steps.pip-cache.outputs.dir }} @@ -67,9 +67,9 @@ jobs: pytorch-version: 2.5.0 numpy-requirement: "'numpy'" steps: - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 - run: conda install -n test ffmpeg python=${{ matrix.python-version }} - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - run: echo "$CONDA/envs/test/bin" >> $GITHUB_PATH - run: pip3 install .["dev"] ${{ matrix.numpy-requirement }} torch==${{ matrix.pytorch-version }}+cpu --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple - run: pytest --durations=0 -vv -k 'not test_transcribe or test_transcribe[tiny] or test_transcribe[tiny.en]' -m 'not requires_cuda' From 26a7cacc83c2cfbbf743022da8331b29702ceedc Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sat, 4 Jan 2025 10:02:18 +0100 Subject: [PATCH 2/4] pre-commit autoupdate && pre-commit run --all-files (#2484) * pre-commit autoupdate && pre-commit run --all-files * Black formatter needs a current version of Python --- .github/workflows/test.yml | 4 ++-- .pre-commit-config.yaml | 8 ++++---- whisper/normalizers/basic.py | 22 +++++++++++++--------- whisper/utils.py | 8 +++++--- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 106c66b..16c7ff7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,7 +16,7 @@ jobs: run: git fetch origin ${{ github.base_ref }} - uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.9" architecture: x64 - name: Get pip cache dir id: pip-cache @@ -33,7 +33,7 @@ jobs: ${{ runner.os }}-pip-pre-commit - name: pre-commit run: | - pip install -U pre-commit + pip install --upgrade pre-commit pre-commit install --install-hooks pre-commit run --all-files whisper-test: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f5a74b..48df249 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v5.0.0 hooks: - id: check-json - id: end-of-file-fixer @@ -11,17 +11,17 @@ repos: - id: check-added-large-files args: [--maxkb=4096] - repo: https://github.com/psf/black - rev: 23.7.0 + rev: 24.10.0 hooks: - id: black - repo: https://github.com/pycqa/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort name: isort (python) args: ["--profile", "black", "-l", "88", "--trailing-comma", "--multi-line", "3"] - repo: https://github.com/pycqa/flake8.git - rev: 6.0.0 + rev: 7.1.1 hooks: - id: flake8 types: [python] diff --git a/whisper/normalizers/basic.py b/whisper/normalizers/basic.py index a824032..8690ae7 100644 --- a/whisper/normalizers/basic.py +++ b/whisper/normalizers/basic.py @@ -30,15 +30,19 @@ def remove_symbols_and_diacritics(s: str, keep=""): and drop any diacritics (category 'Mn' and some manual mappings) """ return "".join( - c - if c in keep - else ADDITIONAL_DIACRITICS[c] - if c in ADDITIONAL_DIACRITICS - else "" - if unicodedata.category(c) == "Mn" - else " " - if unicodedata.category(c)[0] in "MSP" - else c + ( + c + if c in keep + else ( + ADDITIONAL_DIACRITICS[c] + if c in ADDITIONAL_DIACRITICS + else ( + "" + if unicodedata.category(c) == "Mn" + else " " if unicodedata.category(c)[0] in "MSP" else c + ) + ) + ) for c in unicodedata.normalize("NFKD", s) ) diff --git a/whisper/utils.py b/whisper/utils.py index 9b9b138..13792f7 100644 --- a/whisper/utils.py +++ b/whisper/utils.py @@ -209,9 +209,11 @@ class SubtitlesWriter(ResultWriter): yield start, end, "".join( [ - re.sub(r"^(\s*)(.*)$", r"\1\2", word) - if j == i - else word + ( + re.sub(r"^(\s*)(.*)$", r"\1\2", word) + if j == i + else word + ) for j, word in enumerate(all_words) ] ) From dd4d010d2c585bc70aeddd166cd3e26b0bb62f31 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sat, 4 Jan 2025 10:38:35 +0100 Subject: [PATCH 3/4] PEP 621: Migrate from setup.py to pyproject.toml (#2435) --- pyproject.toml | 48 +++++++++++++++++++++++++++++++++++++++++++++++- setup.py | 42 ------------------------------------------ 2 files changed, 47 insertions(+), 43 deletions(-) delete mode 100644 setup.py diff --git a/pyproject.toml b/pyproject.toml index 84637eb..21b90e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,50 @@ +[build-system] +build-backend = "setuptools.build_meta" + +requires = [ "setuptools>=61.2" ] + +[project] +name = "openai-whisper" +description = "Robust Speech Recognition via Large-Scale Weak Supervision" +readme.content-type = "text/markdown" +readme.file = "README.md" +license = { text = "MIT" } +authors = [ { name = "OpenAI" } ] +requires-python = ">=3.8" +classifiers = [ + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] +dynamic = [ "version" ] +dependencies = [ + "more-itertools", + "numba", + "numpy", + "tiktoken", + "torch", + "tqdm", + "triton>=2; (platform_machine=='x86_64' and sys_platform=='linux') or sys_platform=='linux2'", +] +optional-dependencies.dev = [ "black", "flake8", "isort", "pytest", "scipy" ] +urls = { Homepage = "https://github.com/openai/whisper" } +scripts.whisper = "whisper.transcribe:cli" + +[tool.setuptools] +py-modules = [ "whisper" ] +include-package-data = true + +[tool.setuptools.dynamic] +version = { attr = "whisper.version.__version__" } + +[tool.setuptools.packages.find] +exclude = [ "tests*" ] +namespaces = false + [tool.black] [tool.isort] @@ -5,4 +52,3 @@ profile = "black" include_trailing_comma = true line_length = 88 multi_line_output = 3 - diff --git a/setup.py b/setup.py deleted file mode 100644 index 73c4eb8..0000000 --- a/setup.py +++ /dev/null @@ -1,42 +0,0 @@ -import platform -import sys -from pathlib import Path - -import pkg_resources -from setuptools import find_packages, setup - - -def read_version(fname="whisper/version.py"): - exec(compile(open(fname, encoding="utf-8").read(), fname, "exec")) - return locals()["__version__"] - - -requirements = [] -if sys.platform.startswith("linux") and platform.machine() == "x86_64": - requirements.append("triton>=2.0.0") - -setup( - name="openai-whisper", - py_modules=["whisper"], - version=read_version(), - description="Robust Speech Recognition via Large-Scale Weak Supervision", - long_description=open("README.md", encoding="utf-8").read(), - long_description_content_type="text/markdown", - readme="README.md", - python_requires=">=3.8", - author="OpenAI", - url="https://github.com/openai/whisper", - license="MIT", - packages=find_packages(exclude=["tests*"]), - install_requires=[ - str(r) - for r in pkg_resources.parse_requirements( - Path(__file__).with_name("requirements.txt").open() - ) - ], - entry_points={ - "console_scripts": ["whisper=whisper.transcribe:cli"], - }, - include_package_data=True, - extras_require={"dev": ["pytest", "scipy", "black", "flake8", "isort"]}, -) From 517a43ecd132a2089d85f4ebc044728a71d49f6e Mon Sep 17 00:00:00 2001 From: Jong Wook Kim Date: Sat, 4 Jan 2025 12:56:16 -0800 Subject: [PATCH 4/4] Update python-publish.yml using `-m build --sdist` instead of `setup.py sdist` --- .github/workflows/python-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4b91a2a..c868068 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -33,5 +33,5 @@ jobs: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | - python setup.py sdist + python -m build --sdist twine upload dist/*