From 1caba7d5d46d8ee91376c7f25d376cf450ad5fde Mon Sep 17 00:00:00 2001
From: Kent Slaney <kent@slaney.org>
Date: Mon, 22 Jul 2024 16:14:30 -0700
Subject: [PATCH] clarify transcription parameter

---
 jfk.json              | 1 +
 jfk.srt               | 4 ++++
 jfk.tsv               | 2 ++
 jfk.txt               | 1 +
 jfk.vtt               | 5 +++++
 whisper/transcribe.py | 7 ++++---
 6 files changed, 17 insertions(+), 3 deletions(-)
 create mode 100644 jfk.json
 create mode 100644 jfk.srt
 create mode 100644 jfk.tsv
 create mode 100644 jfk.txt
 create mode 100644 jfk.vtt

diff --git a/jfk.json b/jfk.json
new file mode 100644
index 0000000..360fc47
--- /dev/null
+++ b/jfk.json
@@ -0,0 +1 @@
+{"segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 11.0, "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.", "tokens": [50364, 400, 370, 452, 7177, 6280, 11, 1029, 406, 437, 428, 1941, 393, 360, 337, 291, 11, 1029, 437, 291, 393, 360, 337, 428, 1941, 13, 50914], "temperature": 0.0, "avg_logprob": -0.20427462032863072, "compression_ratio": 1.3544303797468353, "no_speech_prob": 0.04382958635687828}], "language": "en", "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country."}
\ No newline at end of file
diff --git a/jfk.srt b/jfk.srt
new file mode 100644
index 0000000..a2c8946
--- /dev/null
+++ b/jfk.srt
@@ -0,0 +1,4 @@
+1
+00:00:00,000 --> 00:00:11,000
+And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
+
diff --git a/jfk.tsv b/jfk.tsv
new file mode 100644
index 0000000..ad86260
--- /dev/null
+++ b/jfk.tsv
@@ -0,0 +1,2 @@
+start	end	text
+0	11000	And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
diff --git a/jfk.txt b/jfk.txt
new file mode 100644
index 0000000..64b97d3
--- /dev/null
+++ b/jfk.txt
@@ -0,0 +1 @@
+And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
diff --git a/jfk.vtt b/jfk.vtt
new file mode 100644
index 0000000..ae50503
--- /dev/null
+++ b/jfk.vtt
@@ -0,0 +1,5 @@
+WEBVTT
+
+00:00.000 --> 00:11.000
+And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
+
diff --git a/whisper/transcribe.py b/whisper/transcribe.py
index f66a146..a215833 100644
--- a/whisper/transcribe.py
+++ b/whisper/transcribe.py
@@ -769,14 +769,15 @@ def audio_tensor(audio: Union[str, np.ndarray, torch.Tensor]) -> torch.Tensor:
 
 class MinimalTranscriber(Transcriber):
     exact: bool = True
-    chlen: float = CHUNK_LENGTH
+    # amount of time per chunk that is considered in-context
+    contextualized: float = CHUNK_LENGTH
 
     async def process(self, stream: ArrayStream, **kw) -> dict:
-        data = await stream.request(self.chlen, self.exact)
+        data = await stream.request(CHUNK_LENGTH, self.exact)
         while data.shape[-1] > 0:
             self(data, stream.offset, True)
             t = (
-                self.chlen
+                self.contextualized
                 - (stream.offset + data.shape[-1] - self.seek) / FRAMES_PER_SECOND
                 + CHUNK_LENGTH
             )