pre-commit autoupdate && pre-commit run --all-files (#2484)

* pre-commit autoupdate && pre-commit run --all-files * Black formatter needs a current version of Python
2025-10-14 00:59:39 +00:00 · 2025-01-04 10:02:18 +01:00 · 2025-01-04 10:02:18 +01:00 · 26a7cacc83
commit 26a7cacc83
parent 6c1d8f1ea1
4 changed files with 24 additions and 18 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -16,7 +16,7 @@ jobs:
        run: git fetch origin ${{ github.base_ref }}
      - uses: actions/setup-python@v5
        with:
-          python-version: "3.8"
+          python-version: "3.9"
          architecture: x64
      - name: Get pip cache dir
        id: pip-cache
@ -33,7 +33,7 @@ jobs:
            ${{ runner.os }}-pip-pre-commit
      - name: pre-commit
        run: |
-          pip install -U pre-commit
+          pip install --upgrade pre-commit
          pre-commit install --install-hooks
          pre-commit run --all-files
  whisper-test:
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,6 +1,6 @@
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
+    rev: v5.0.0
    hooks:
      - id: check-json
      - id: end-of-file-fixer
@ -11,17 +11,17 @@ repos:
      - id: check-added-large-files
        args: [--maxkb=4096]
  - repo: https://github.com/psf/black
-    rev: 23.7.0
+    rev: 24.10.0
    hooks:
      - id: black
  - repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+    rev: 5.13.2
    hooks:
      - id: isort
        name: isort (python)
        args: ["--profile", "black", "-l", "88", "--trailing-comma", "--multi-line", "3"]
  - repo: https://github.com/pycqa/flake8.git
-    rev: 6.0.0
+    rev: 7.1.1
    hooks:
      - id: flake8
        types: [python]
--- a/whisper/normalizers/basic.py
+++ b/whisper/normalizers/basic.py
@ -30,15 +30,19 @@ def remove_symbols_and_diacritics(s: str, keep=""):
    and drop any diacritics (category 'Mn' and some manual mappings)
    """
    return "".join(
+        (
            c
            if c in keep
-        else ADDITIONAL_DIACRITICS[c]
+            else (
+                ADDITIONAL_DIACRITICS[c]
                if c in ADDITIONAL_DIACRITICS
-        else ""
+                else (
+                    ""
                    if unicodedata.category(c) == "Mn"
-        else " "
-        if unicodedata.category(c)[0] in "MSP"
-        else c
+                    else " " if unicodedata.category(c)[0] in "MSP" else c
+                )
+            )
+        )
        for c in unicodedata.normalize("NFKD", s)
    )

--- a/whisper/utils.py
+++ b/whisper/utils.py
@ -209,9 +209,11 @@ class SubtitlesWriter(ResultWriter):

                        yield start, end, "".join(
                            [
+                                (
                                    re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
                                    if j == i
                                    else word
+                                )
                                for j, word in enumerate(all_words)
                            ]
                        )