mirror of
https://github.com/openai/whisper.git
synced 2025-03-30 14:28:27 +00:00
Merge branch 'main' into patch-1
This commit is contained in:
commit
bab8297000
2
.github/workflows/python-publish.yml
vendored
2
.github/workflows/python-publish.yml
vendored
@ -33,5 +33,5 @@ jobs:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
|
||||
run: |
|
||||
python setup.py sdist
|
||||
python -m build --sdist
|
||||
twine upload dist/*
|
||||
|
14
.github/workflows/test.yml
vendored
14
.github/workflows/test.yml
vendored
@ -11,19 +11,19 @@ jobs:
|
||||
pre-commit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Fetch base branch
|
||||
run: git fetch origin ${{ github.base_ref }}
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.8"
|
||||
python-version: "3.9"
|
||||
architecture: x64
|
||||
- name: Get pip cache dir
|
||||
id: pip-cache
|
||||
run: |
|
||||
echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
|
||||
- name: pip/pre-commit cache
|
||||
uses: actions/cache@v3
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
${{ steps.pip-cache.outputs.dir }}
|
||||
@ -33,7 +33,7 @@ jobs:
|
||||
${{ runner.os }}-pip-pre-commit
|
||||
- name: pre-commit
|
||||
run: |
|
||||
pip install -U pre-commit
|
||||
pip install --upgrade pre-commit
|
||||
pre-commit install --install-hooks
|
||||
pre-commit run --all-files
|
||||
whisper-test:
|
||||
@ -67,9 +67,9 @@ jobs:
|
||||
pytorch-version: 2.5.0
|
||||
numpy-requirement: "'numpy'"
|
||||
steps:
|
||||
- uses: conda-incubator/setup-miniconda@v2
|
||||
- uses: conda-incubator/setup-miniconda@v3
|
||||
- run: conda install -n test ffmpeg python=${{ matrix.python-version }}
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- run: echo "$CONDA/envs/test/bin" >> $GITHUB_PATH
|
||||
- run: pip3 install .["dev"] ${{ matrix.numpy-requirement }} torch==${{ matrix.pytorch-version }}+cpu --index-url https://download.pytorch.org/whl/cpu --extra-index-url https://pypi.org/simple
|
||||
- run: pytest --durations=0 -vv -k 'not test_transcribe or test_transcribe[tiny] or test_transcribe[tiny.en]' -m 'not requires_cuda'
|
||||
|
@ -1,6 +1,6 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.0.1
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-json
|
||||
- id: end-of-file-fixer
|
||||
@ -11,17 +11,17 @@ repos:
|
||||
- id: check-added-large-files
|
||||
args: [--maxkb=4096]
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.7.0
|
||||
rev: 24.10.0
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.12.0
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
name: isort (python)
|
||||
args: ["--profile", "black", "-l", "88", "--trailing-comma", "--multi-line", "3"]
|
||||
- repo: https://github.com/pycqa/flake8.git
|
||||
rev: 6.0.0
|
||||
rev: 7.1.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
types: [python]
|
||||
|
@ -1,3 +1,50 @@
|
||||
[build-system]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
requires = [ "setuptools>=61.2" ]
|
||||
|
||||
[project]
|
||||
name = "openai-whisper"
|
||||
description = "Robust Speech Recognition via Large-Scale Weak Supervision"
|
||||
readme.content-type = "text/markdown"
|
||||
readme.file = "README.md"
|
||||
license = { text = "MIT" }
|
||||
authors = [ { name = "OpenAI" } ]
|
||||
requires-python = ">=3.8"
|
||||
classifiers = [
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
]
|
||||
dynamic = [ "version" ]
|
||||
dependencies = [
|
||||
"more-itertools",
|
||||
"numba",
|
||||
"numpy",
|
||||
"tiktoken",
|
||||
"torch",
|
||||
"tqdm",
|
||||
"triton>=2; (platform_machine=='x86_64' and sys_platform=='linux') or sys_platform=='linux2'",
|
||||
]
|
||||
optional-dependencies.dev = [ "black", "flake8", "isort", "pytest", "scipy" ]
|
||||
urls = { Homepage = "https://github.com/openai/whisper" }
|
||||
scripts.whisper = "whisper.transcribe:cli"
|
||||
|
||||
[tool.setuptools]
|
||||
py-modules = [ "whisper" ]
|
||||
include-package-data = true
|
||||
|
||||
[tool.setuptools.dynamic]
|
||||
version = { attr = "whisper.version.__version__" }
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
exclude = [ "tests*" ]
|
||||
namespaces = false
|
||||
|
||||
[tool.black]
|
||||
|
||||
[tool.isort]
|
||||
@ -5,4 +52,3 @@ profile = "black"
|
||||
include_trailing_comma = true
|
||||
line_length = 88
|
||||
multi_line_output = 3
|
||||
|
||||
|
42
setup.py
42
setup.py
@ -1,42 +0,0 @@
|
||||
import platform
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pkg_resources
|
||||
from setuptools import find_packages, setup
|
||||
|
||||
|
||||
def read_version(fname="whisper/version.py"):
|
||||
exec(compile(open(fname, encoding="utf-8").read(), fname, "exec"))
|
||||
return locals()["__version__"]
|
||||
|
||||
|
||||
requirements = []
|
||||
if sys.platform.startswith("linux") and platform.machine() == "x86_64":
|
||||
requirements.append("triton>=2.0.0")
|
||||
|
||||
setup(
|
||||
name="openai-whisper",
|
||||
py_modules=["whisper"],
|
||||
version=read_version(),
|
||||
description="Robust Speech Recognition via Large-Scale Weak Supervision",
|
||||
long_description=open("README.md", encoding="utf-8").read(),
|
||||
long_description_content_type="text/markdown",
|
||||
readme="README.md",
|
||||
python_requires=">=3.8",
|
||||
author="OpenAI",
|
||||
url="https://github.com/openai/whisper",
|
||||
license="MIT",
|
||||
packages=find_packages(exclude=["tests*"]),
|
||||
install_requires=[
|
||||
str(r)
|
||||
for r in pkg_resources.parse_requirements(
|
||||
Path(__file__).with_name("requirements.txt").open()
|
||||
)
|
||||
],
|
||||
entry_points={
|
||||
"console_scripts": ["whisper=whisper.transcribe:cli"],
|
||||
},
|
||||
include_package_data=True,
|
||||
extras_require={"dev": ["pytest", "scipy", "black", "flake8", "isort"]},
|
||||
)
|
@ -30,15 +30,19 @@ def remove_symbols_and_diacritics(s: str, keep=""):
|
||||
and drop any diacritics (category 'Mn' and some manual mappings)
|
||||
"""
|
||||
return "".join(
|
||||
c
|
||||
if c in keep
|
||||
else ADDITIONAL_DIACRITICS[c]
|
||||
if c in ADDITIONAL_DIACRITICS
|
||||
else ""
|
||||
if unicodedata.category(c) == "Mn"
|
||||
else " "
|
||||
if unicodedata.category(c)[0] in "MSP"
|
||||
else c
|
||||
(
|
||||
c
|
||||
if c in keep
|
||||
else (
|
||||
ADDITIONAL_DIACRITICS[c]
|
||||
if c in ADDITIONAL_DIACRITICS
|
||||
else (
|
||||
""
|
||||
if unicodedata.category(c) == "Mn"
|
||||
else " " if unicodedata.category(c)[0] in "MSP" else c
|
||||
)
|
||||
)
|
||||
)
|
||||
for c in unicodedata.normalize("NFKD", s)
|
||||
)
|
||||
|
||||
|
@ -209,9 +209,11 @@ class SubtitlesWriter(ResultWriter):
|
||||
|
||||
yield start, end, "".join(
|
||||
[
|
||||
re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
|
||||
if j == i
|
||||
else word
|
||||
(
|
||||
re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
|
||||
if j == i
|
||||
else word
|
||||
)
|
||||
for j, word in enumerate(all_words)
|
||||
]
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user