clarify transcription parameter

This commit is contained in:
Kent Slaney 2024-07-22 16:14:30 -07:00
parent 092cb3409e
commit 1caba7d5d4
6 changed files with 17 additions and 3 deletions

1
jfk.json Normal file
View File

@ -0,0 +1 @@
{"segments": [{"id": 0, "seek": 0, "start": 0.0, "end": 11.0, "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.", "tokens": [50364, 400, 370, 452, 7177, 6280, 11, 1029, 406, 437, 428, 1941, 393, 360, 337, 291, 11, 1029, 437, 291, 393, 360, 337, 428, 1941, 13, 50914], "temperature": 0.0, "avg_logprob": -0.20427462032863072, "compression_ratio": 1.3544303797468353, "no_speech_prob": 0.04382958635687828}], "language": "en", "text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country."}

4
jfk.srt Normal file
View File

@ -0,0 +1,4 @@
1
00:00:00,000 --> 00:00:11,000
And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.

2
jfk.tsv Normal file
View File

@ -0,0 +1,2 @@
start end text
0 11000 And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
1 start end text
2 0 11000 And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.

1
jfk.txt Normal file
View File

@ -0,0 +1 @@
And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.

5
jfk.vtt Normal file
View File

@ -0,0 +1,5 @@
WEBVTT
00:00.000 --> 00:11.000
And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.

View File

@ -769,14 +769,15 @@ def audio_tensor(audio: Union[str, np.ndarray, torch.Tensor]) -> torch.Tensor:
class MinimalTranscriber(Transcriber): class MinimalTranscriber(Transcriber):
exact: bool = True exact: bool = True
chlen: float = CHUNK_LENGTH # amount of time per chunk that is considered in-context
contextualized: float = CHUNK_LENGTH
async def process(self, stream: ArrayStream, **kw) -> dict: async def process(self, stream: ArrayStream, **kw) -> dict:
data = await stream.request(self.chlen, self.exact) data = await stream.request(CHUNK_LENGTH, self.exact)
while data.shape[-1] > 0: while data.shape[-1] > 0:
self(data, stream.offset, True) self(data, stream.offset, True)
t = ( t = (
self.chlen self.contextualized
- (stream.offset + data.shape[-1] - self.seek) / FRAMES_PER_SECOND - (stream.offset + data.shape[-1] - self.seek) / FRAMES_PER_SECOND
+ CHUNK_LENGTH + CHUNK_LENGTH
) )