mirror of
https://github.com/openai/whisper.git
synced 2025-11-24 06:26:03 +00:00
Backend Updates: - Add lazy loading for Whisper model (faster startup) - Use environment variables for port and config - Add root endpoint for health checking - Configure CORS for production - Add tempfile support for uploads - Update to support gunicorn production server - Add Procfile for Heroku/Railway compatibility Frontend Updates: - Optimize Vite build configuration - Add production build optimizations - Enable minification and code splitting - Configure preview server for production Configuration: - Add .env.example files for both frontend and backend - Create railway.toml for Railway deployment - Add Procfile for process management - Setup environment variable templates Documentation: - Create comprehensive RAILWAY_DEPLOYMENT.md guide - Include step-by-step deployment instructions - Add troubleshooting section - Include cost breakdown - Add monitoring and maintenance guide Dependencies: - Add gunicorn for production WSGI server Ready for Railway deployment with: - Free $5/month credit - Automatic scaling - 24/7 uptime - Custom domain support (optional)
225 lines
6.6 KiB
Python
225 lines
6.6 KiB
Python
"""
|
|
Farsi Transcriber Backend API
|
|
|
|
Flask API for handling audio/video file transcription using Whisper model.
|
|
Configured for Railway deployment.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
from werkzeug.utils import secure_filename
|
|
import whisper
|
|
from flask import Flask, request, jsonify
|
|
from flask_cors import CORS
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
app = Flask(__name__)
|
|
CORS(app, resources={r"/api/*": {"origins": "*"}})
|
|
|
|
# Configuration
|
|
UPLOAD_FOLDER = tempfile.gettempdir()
|
|
ALLOWED_EXTENSIONS = {'mp3', 'wav', 'm4a', 'flac', 'ogg', 'aac', 'wma', 'mp4', 'mkv', 'mov', 'webm', 'avi', 'flv', 'wmv'}
|
|
MAX_FILE_SIZE = 500 * 1024 * 1024 # 500MB
|
|
|
|
# Production settings
|
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
|
app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE
|
|
app.config['ENV'] = os.getenv('FLASK_ENV', 'production')
|
|
|
|
# Load Whisper model (lazy load for faster startup)
|
|
model = None
|
|
|
|
def load_model():
|
|
"""Lazy load Whisper model on first use"""
|
|
global model
|
|
if model is None:
|
|
try:
|
|
print("Loading Whisper model...")
|
|
model = whisper.load_model('medium')
|
|
print("✓ Whisper model loaded successfully")
|
|
except Exception as e:
|
|
print(f"✗ Error loading Whisper model: {e}")
|
|
model = None
|
|
return model
|
|
|
|
|
|
def allowed_file(filename):
|
|
"""Check if file has allowed extension"""
|
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
|
|
|
|
|
@app.route('/', methods=['GET'])
|
|
def index():
|
|
"""Root endpoint"""
|
|
return jsonify({
|
|
'message': 'Farsi Transcriber API',
|
|
'version': '1.0.0',
|
|
'status': 'running'
|
|
})
|
|
|
|
|
|
@app.route('/health', methods=['GET'])
|
|
def health():
|
|
"""Health check endpoint"""
|
|
model_status = load_model()
|
|
return jsonify({
|
|
'status': 'healthy',
|
|
'model_loaded': model_status is not None,
|
|
'environment': app.config['ENV']
|
|
})
|
|
|
|
|
|
@app.route('/transcribe', methods=['POST'])
|
|
def transcribe():
|
|
"""
|
|
Transcribe audio/video file
|
|
|
|
Request:
|
|
- file: Audio/video file
|
|
- language: Language code (default: 'fa' for Farsi)
|
|
|
|
Response:
|
|
- transcription results with segments and timestamps
|
|
"""
|
|
try:
|
|
# Load model if not already loaded
|
|
whisper_model = load_model()
|
|
if not whisper_model:
|
|
return jsonify({'error': 'Failed to load Whisper model'}), 500
|
|
|
|
# Check if file is in request
|
|
if 'file' not in request.files:
|
|
return jsonify({'error': 'No file provided'}), 400
|
|
|
|
file = request.files['file']
|
|
|
|
if file.filename == '':
|
|
return jsonify({'error': 'No file selected'}), 400
|
|
|
|
if not allowed_file(file.filename):
|
|
return jsonify({'error': 'File type not allowed'}), 400
|
|
|
|
# Save file
|
|
filename = secure_filename(file.filename)
|
|
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
file.save(filepath)
|
|
|
|
# Get language code from request (default: Farsi)
|
|
language = request.form.get('language', 'fa')
|
|
|
|
# Transcribe
|
|
result = whisper_model.transcribe(filepath, language=language, verbose=False)
|
|
|
|
# Format response
|
|
segments = []
|
|
for segment in result.get('segments', []):
|
|
segments.append({
|
|
'start': f"{int(segment['start'] // 3600):02d}:{int((segment['start'] % 3600) // 60):02d}:{int(segment['start'] % 60):02d}.{int((segment['start'] % 1) * 1000):03d}",
|
|
'end': f"{int(segment['end'] // 3600):02d}:{int((segment['end'] % 3600) // 60):02d}:{int(segment['end'] % 60):02d}.{int((segment['end'] % 1) * 1000):03d}",
|
|
'text': segment['text'].strip(),
|
|
})
|
|
|
|
# Clean up uploaded file
|
|
try:
|
|
os.remove(filepath)
|
|
except:
|
|
pass
|
|
|
|
return jsonify({
|
|
'status': 'success',
|
|
'filename': filename,
|
|
'language': result.get('language', 'unknown'),
|
|
'text': result.get('text', ''),
|
|
'segments': segments
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
@app.route('/models', methods=['GET'])
|
|
def get_models():
|
|
"""Get available Whisper models"""
|
|
return jsonify({
|
|
'available_models': ['tiny', 'base', 'small', 'medium', 'large'],
|
|
'current_model': 'medium',
|
|
'description': 'List of available Whisper models. Larger models are more accurate but slower.'
|
|
})
|
|
|
|
|
|
@app.route('/export', methods=['POST'])
|
|
def export():
|
|
"""
|
|
Export transcription in specified format
|
|
|
|
Request:
|
|
- transcription: Full transcription text
|
|
- segments: Array of segments with timestamps
|
|
- format: Export format (txt, srt, vtt, json)
|
|
|
|
Response:
|
|
- Exported file content
|
|
"""
|
|
try:
|
|
data = request.json
|
|
transcription = data.get('transcription', '')
|
|
segments = data.get('segments', [])
|
|
format_type = data.get('format', 'txt').lower()
|
|
|
|
if format_type == 'txt':
|
|
content = transcription
|
|
mime_type = 'text/plain'
|
|
elif format_type == 'srt':
|
|
content = _format_srt(segments)
|
|
mime_type = 'text/plain'
|
|
elif format_type == 'vtt':
|
|
content = _format_vtt(segments)
|
|
mime_type = 'text/plain'
|
|
elif format_type == 'json':
|
|
import json
|
|
content = json.dumps({'text': transcription, 'segments': segments}, ensure_ascii=False, indent=2)
|
|
mime_type = 'application/json'
|
|
else:
|
|
return jsonify({'error': 'Unsupported format'}), 400
|
|
|
|
return jsonify({
|
|
'status': 'success',
|
|
'format': format_type,
|
|
'content': content,
|
|
'mime_type': mime_type
|
|
})
|
|
|
|
except Exception as e:
|
|
return jsonify({'error': str(e)}), 500
|
|
|
|
|
|
def _format_srt(segments):
|
|
"""Format transcription as SRT subtitle format"""
|
|
lines = []
|
|
for i, segment in enumerate(segments, 1):
|
|
lines.append(str(i))
|
|
lines.append(f"{segment['start']} --> {segment['end']}")
|
|
lines.append(segment['text'])
|
|
lines.append('')
|
|
return '\n'.join(lines)
|
|
|
|
|
|
def _format_vtt(segments):
|
|
"""Format transcription as WebVTT subtitle format"""
|
|
lines = ['WEBVTT', '']
|
|
for segment in segments:
|
|
lines.append(f"{segment['start']} --> {segment['end']}")
|
|
lines.append(segment['text'])
|
|
lines.append('')
|
|
return '\n'.join(lines)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
port = int(os.getenv('PORT', 5000))
|
|
debug = os.getenv('FLASK_ENV', 'production') == 'development'
|
|
app.run(debug=debug, host='0.0.0.0', port=port, threaded=True)
|