From 1caba7d5d46d8ee91376c7f25d376cf450ad5fde Mon Sep 17 00:00:00 2001 From: Kent Slaney Date: Mon, 22 Jul 2024 16:14:30 -0700 Subject: [PATCH] clarify transcription parameter --- jfk.json | 1 + jfk.srt | 4 ++++ jfk.tsv | 2 ++ jfk.txt | 1 + jfk.vtt | 5 +++++ whisper/transcribe.py | 7 ++++--- 6 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 jfk.json create mode 100644 jfk.srt create mode 100644 jfk.tsv create mode 100644 jfk.txt create mode 100644 jfk.vtt diff --git a/jfk.json b/jfk.json new file mode 100644 index 0000000..360fc47 --- /dev/null +++ b/jfk.json @@ -0,0 +1 @@ +{"segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 11.0, "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.", "tokens": [50364, 400, 370, 452, 7177, 6280, 11, 1029, 406, 437, 428, 1941, 393, 360, 337, 291, 11, 1029, 437, 291, 393, 360, 337, 428, 1941, 13, 50914], "temperature": 0.0, "avg_logprob": -0.20427462032863072, "compression_ratio": 1.3544303797468353, "no_speech_prob": 0.04382958635687828}], "language": "en", "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country."} \ No newline at end of file diff --git a/jfk.srt b/jfk.srt new file mode 100644 index 0000000..a2c8946 --- /dev/null +++ b/jfk.srt @@ -0,0 +1,4 @@ +1 +00:00:00,000 --> 00:00:11,000 +And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country. + diff --git a/jfk.tsv b/jfk.tsv new file mode 100644 index 0000000..ad86260 --- /dev/null +++ b/jfk.tsv @@ -0,0 +1,2 @@ +start end text +0 11000 And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country. diff --git a/jfk.txt b/jfk.txt new file mode 100644 index 0000000..64b97d3 --- /dev/null +++ b/jfk.txt @@ -0,0 +1 @@ +And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country. diff --git a/jfk.vtt b/jfk.vtt new file mode 100644 index 0000000..ae50503 --- /dev/null +++ b/jfk.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.000 --> 00:11.000 +And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country. + diff --git a/whisper/transcribe.py b/whisper/transcribe.py index f66a146..a215833 100644 --- a/whisper/transcribe.py +++ b/whisper/transcribe.py @@ -769,14 +769,15 @@ def audio_tensor(audio: Union[str, np.ndarray, torch.Tensor]) -> torch.Tensor: class MinimalTranscriber(Transcriber): exact: bool = True - chlen: float = CHUNK_LENGTH + # amount of time per chunk that is considered in-context + contextualized: float = CHUNK_LENGTH async def process(self, stream: ArrayStream, **kw) -> dict: - data = await stream.request(self.chlen, self.exact) + data = await stream.request(CHUNK_LENGTH, self.exact) while data.shape[-1] > 0: self(data, stream.offset, True) t = ( - self.chlen + self.contextualized - (stream.offset + data.shape[-1] - self.seek) / FRAMES_PER_SECOND + CHUNK_LENGTH )