mirror of
https://github.com/openai/whisper.git
synced 2025-11-24 06:26:03 +00:00
Implemented user interface for easier user access
This commit is contained in:
parent
e144130250
commit
a739db47bb
44
README.md
44
README.md
@ -1,22 +1,38 @@
|
|||||||
|
## Enhanced Features
|
||||||
|
|
||||||
## Personalized Features
|
This fork of OpenAI's Whisper includes several enhancements to improve file organization, user experience, and ease of transcription. Below is a summary of the new features:
|
||||||
|
|
||||||
This fork of OpenAI's Whisper includes the following enhancements:
|
### 1. **Automated Folder Creation for Each Transcription Run**
|
||||||
|
- Each time an audio file is transcribed, a unique folder is created under a parent directory named `Results`.
|
||||||
|
- The unique folder is named based on the original audio file name and a timestamp, e.g., `Results/[audio_file_name]_[timestamp]`.
|
||||||
|
- This structure keeps transcription results organized and prevents overwriting, making it easy to manage and review multiple transcriptions.
|
||||||
|
|
||||||
1. **Automated Folder Creation for Each Transcription Run**:
|
### 2. **Temporary File Storage for Uploaded Audio Files**
|
||||||
- Each time an audio file is transcribed, a unique folder is created under a parent folder named `Results`.
|
- Uploaded audio files are stored temporarily in a folder named `TempUploads`.
|
||||||
- This folder stores the transcription results, organized by audio file and timestamp.
|
- This separation between original audio files and transcription results enhances organization and simplifies the process of clearing temporary files when they’re no longer needed.
|
||||||
- This makes it easy to review transcriptions from multiple audio files without overwriting previous results.
|
|
||||||
|
|
||||||
2. **New Script: `run_and_save.py`**:
|
### 3. **Interactive Web Interface: `app.py`**
|
||||||
- This script allows you to transcribe an audio file and automatically save the output in a structured directory.
|
- The `app.py` script, built with Streamlit, serves as the main interface for Whisper. This web-based UI provides an intuitive way to interact with Whisper without needing the command line.
|
||||||
- Usage:
|
- **Features**:
|
||||||
```bash
|
- **Upload Audio Files**: Supports various audio formats (e.g., MP3, WAV, M4A, MP4) and stores them temporarily in `TempUploads`.
|
||||||
python run_and_save.py /path/to/your_audio_file.mp3 --model tiny
|
- **Choose Model Size**: Allows users to select from Whisper model sizes (`tiny`, `base`, `small`, `medium`, `large`).
|
||||||
```
|
- **Organized Transcription Output**: Each transcription is saved in a unique folder under `Results`, with the transcription stored as `transcription.txt`.
|
||||||
- The output is saved in `Results/[audio_file_name]_[timestamp]/transcription.txt`.
|
- **Usage**:
|
||||||
- The `--model` argument lets you select the model size (options: `tiny`, `base`, `small`, `medium`, `large`).
|
- First, install Streamlit if you haven’t already:
|
||||||
|
```bash
|
||||||
|
pip install streamlit
|
||||||
|
```
|
||||||
|
- Then, run the app:
|
||||||
|
```bash
|
||||||
|
streamlit run app.py
|
||||||
|
```
|
||||||
|
- Open your browser and go to the provided URL (usually `http://localhost:8501`) to access the app.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
These updates make `app.py` the primary and streamlined interface for managing transcriptions with Whisper. Temporary files and organized results folders ensure clear file management, while the web UI allows users to interact with Whisper effortlessly.
|
||||||
|
|
||||||
|
These enhancements make it easier to manage multiple transcription tasks, keep audio files and transcriptions organized, and provide an intuitive user interface for interacting with Whisper.
|
||||||
|
|
||||||
# Whisper
|
# Whisper
|
||||||
|
|
||||||
|
|||||||
1
Results/DarknessHuntUs_20241113_101914/transcription.txt
Normal file
1
Results/DarknessHuntUs_20241113_101914/transcription.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
I fear darkness hunts us. Use what you've learned and stay the course.
|
||||||
BIN
TempUploads/DarknessHuntUs.mp3
Normal file
BIN
TempUploads/DarknessHuntUs.mp3
Normal file
Binary file not shown.
BIN
TempUploads/DontForgetToSubscribe.mp3
Normal file
BIN
TempUploads/DontForgetToSubscribe.mp3
Normal file
Binary file not shown.
BIN
TempUploads/deathsounzombie.mp3
Normal file
BIN
TempUploads/deathsounzombie.mp3
Normal file
Binary file not shown.
65
app.py
Normal file
65
app.py
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import os
|
||||||
|
from datetime import datetime
|
||||||
|
from whisper import load_model, transcribe
|
||||||
|
|
||||||
|
# Set up the app title and description
|
||||||
|
st.title("Whisper Audio Transcription")
|
||||||
|
st.write("Upload an audio file and choose a model to transcribe it using OpenAI's Whisper.")
|
||||||
|
|
||||||
|
# File uploader widget
|
||||||
|
uploaded_file = st.file_uploader("Choose an audio file...", type=["mp3", "wav", "m4a", "mp4"])
|
||||||
|
|
||||||
|
# Model selection widget
|
||||||
|
model_size = st.selectbox("Choose model size:", ["tiny", "base", "small", "medium", "large"])
|
||||||
|
|
||||||
|
# Define folders for temporary uploads and results
|
||||||
|
temp_upload_folder = "TempUploads"
|
||||||
|
results_folder = "Results"
|
||||||
|
os.makedirs(temp_upload_folder, exist_ok=True) # Create TempUploads if it doesn't exist
|
||||||
|
os.makedirs(results_folder, exist_ok=True) # Create Results if it doesn't exist
|
||||||
|
|
||||||
|
# Function to create a unique output folder for each transcription run
|
||||||
|
def create_output_folder(audio_file):
|
||||||
|
# Use the audio file name (without extension) and a timestamp to create a unique folder name
|
||||||
|
folder_name = os.path.splitext(os.path.basename(audio_file))[0]
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
output_folder = os.path.join(results_folder, f"{folder_name}_{timestamp}")
|
||||||
|
|
||||||
|
# Create the output folder if it doesn’t exist
|
||||||
|
os.makedirs(output_folder, exist_ok=True)
|
||||||
|
return output_folder
|
||||||
|
|
||||||
|
# Button to start transcription
|
||||||
|
if st.button("Transcribe"):
|
||||||
|
if uploaded_file is not None:
|
||||||
|
# Save the uploaded file temporarily with its original name in TempUploads
|
||||||
|
temp_file_path = os.path.join(temp_upload_folder, uploaded_file.name)
|
||||||
|
|
||||||
|
with open(temp_file_path, "wb") as f:
|
||||||
|
f.write(uploaded_file.getbuffer())
|
||||||
|
|
||||||
|
# Load the chosen Whisper model
|
||||||
|
model = load_model(model_size)
|
||||||
|
|
||||||
|
# Create a unique folder for the transcription output in Results
|
||||||
|
output_folder = create_output_folder(uploaded_file.name)
|
||||||
|
|
||||||
|
# Run transcription
|
||||||
|
try:
|
||||||
|
result = transcribe(model, temp_file_path)
|
||||||
|
|
||||||
|
# Save transcription to a text file in the output folder
|
||||||
|
output_file = os.path.join(output_folder, "transcription.txt")
|
||||||
|
with open(output_file, "w") as f:
|
||||||
|
f.write(result["text"])
|
||||||
|
|
||||||
|
# Display the transcription result in the app
|
||||||
|
st.write("### Transcription Result")
|
||||||
|
st.write(result["text"])
|
||||||
|
st.write(f"Transcription saved to {output_file}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
st.write("An error occurred:", e)
|
||||||
|
else:
|
||||||
|
st.write("Please upload an audio file.")
|
||||||
@ -1,59 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import os
|
|
||||||
from datetime import datetime
|
|
||||||
from whisper import load_model, transcribe # Import necessary functions from Whisper
|
|
||||||
from colorama import Fore, Style
|
|
||||||
|
|
||||||
|
|
||||||
def create_output_folder(audio_file):
|
|
||||||
# Base folder where all test folders will be created
|
|
||||||
base_folder = "Results"
|
|
||||||
|
|
||||||
# Use the audio file name (without extension) and a timestamp to create a unique folder name
|
|
||||||
folder_name = os.path.splitext(os.path.basename(audio_file))[0]
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
output_folder = os.path.join(base_folder, f"{folder_name}_{timestamp}")
|
|
||||||
|
|
||||||
# Create the Tests folder and the output folder if they don’t exist
|
|
||||||
os.makedirs(output_folder, exist_ok=True)
|
|
||||||
return output_folder
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="Transcribe audio files and save results to a unique folder.")
|
|
||||||
parser.add_argument("file", type=str, help="Path to the audio file.")
|
|
||||||
parser.add_argument("--model", choices=["tiny", "base", "small", "medium", "large"], default="tiny",
|
|
||||||
help="Choose the model size for transcription (default is 'tiny').")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Check if the audio file exists
|
|
||||||
if not os.path.isfile(args.file):
|
|
||||||
print(Fore.RED + f"Error: The file '{args.file}' does not exist." + Style.RESET_ALL)
|
|
||||||
return
|
|
||||||
|
|
||||||
# Load the Whisper model
|
|
||||||
print(Fore.CYAN + f"Loading model '{args.model}'..." + Style.RESET_ALL)
|
|
||||||
model = load_model(args.model)
|
|
||||||
|
|
||||||
# Create a unique folder under "Tests" for this run
|
|
||||||
output_folder = create_output_folder(args.file)
|
|
||||||
print(Fore.CYAN + f"Created folder for results: {output_folder}" + Style.RESET_ALL)
|
|
||||||
|
|
||||||
# Run the transcription
|
|
||||||
try:
|
|
||||||
print(Fore.CYAN + f"Transcribing '{args.file}' using the '{args.model}' model..." + Style.RESET_ALL)
|
|
||||||
result = transcribe(model, args.file)
|
|
||||||
|
|
||||||
# Save transcription to a text file in the output folder
|
|
||||||
output_file = os.path.join(output_folder, "transcription.txt")
|
|
||||||
with open(output_file, "w") as f:
|
|
||||||
f.write(result["text"])
|
|
||||||
print(Fore.GREEN + f"Transcription completed successfully! Saved to {output_file}" + Style.RESET_ALL)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(Fore.RED + f"An error occurred: {e}" + Style.RESET_ALL)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,37 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import os
|
|
||||||
from whisper import load_model # For loading the Whisper model
|
|
||||||
from whisper.transcribe import transcribe # Import the transcribe function
|
|
||||||
from colorama import Fore, Style
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="Transcribe audio files using OpenAI's Whisper model.")
|
|
||||||
parser.add_argument("file", type=str, help="Path to the audio file.")
|
|
||||||
parser.add_argument("--model", choices=["tiny", "base", "small", "medium", "large"], default="tiny",
|
|
||||||
help="Choose the model size for transcription (default is 'tiny').")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Check if the audio file exists
|
|
||||||
if not os.path.isfile(args.file):
|
|
||||||
print(Fore.RED + f"Error: The file '{args.file}' does not exist." + Style.RESET_ALL)
|
|
||||||
return
|
|
||||||
|
|
||||||
print(Fore.CYAN + f"Transcribing '{args.file}' using the '{args.model}' model..." + Style.RESET_ALL)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Load the model
|
|
||||||
model = load_model(args.model)
|
|
||||||
|
|
||||||
# Transcribe the audio file
|
|
||||||
result = transcribe(model, args.file)
|
|
||||||
|
|
||||||
# Print the transcription result
|
|
||||||
print(Fore.GREEN + "Transcription completed successfully!" + Style.RESET_ALL)
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(Fore.RED + f"An error occurred: {e}" + Style.RESET_ALL)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Loading…
x
Reference in New Issue
Block a user