""" Farsi Transcriber Backend API Flask API for handling audio/video file transcription using Whisper model. Configured for Railway deployment. """ import os import sys import tempfile from pathlib import Path from werkzeug.utils import secure_filename import whisper from flask import Flask, request, jsonify from flask_cors import CORS # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent.parent)) app = Flask(__name__) CORS(app, resources={r"/api/*": {"origins": "*"}}) # Configuration UPLOAD_FOLDER = tempfile.gettempdir() ALLOWED_EXTENSIONS = {'mp3', 'wav', 'm4a', 'flac', 'ogg', 'aac', 'wma', 'mp4', 'mkv', 'mov', 'webm', 'avi', 'flv', 'wmv'} MAX_FILE_SIZE = 500 * 1024 * 1024 # 500MB # Production settings app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE app.config['ENV'] = os.getenv('FLASK_ENV', 'production') # Load Whisper model (lazy load for faster startup) model = None def load_model(): """Lazy load Whisper model on first use""" global model if model is None: try: print("Loading Whisper model...") model = whisper.load_model('medium') print("✓ Whisper model loaded successfully") except Exception as e: print(f"✗ Error loading Whisper model: {e}") model = None return model def allowed_file(filename): """Check if file has allowed extension""" return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS @app.route('/', methods=['GET']) def index(): """Root endpoint""" return jsonify({ 'message': 'Farsi Transcriber API', 'version': '1.0.0', 'status': 'running' }) @app.route('/health', methods=['GET']) def health(): """Health check endpoint""" model_status = load_model() return jsonify({ 'status': 'healthy', 'model_loaded': model_status is not None, 'environment': app.config['ENV'] }) @app.route('/transcribe', methods=['POST']) def transcribe(): """ Transcribe audio/video file Request: - file: Audio/video file - language: Language code (default: 'fa' for Farsi) Response: - transcription results with segments and timestamps """ try: # Load model if not already loaded whisper_model = load_model() if not whisper_model: return jsonify({'error': 'Failed to load Whisper model'}), 500 # Check if file is in request if 'file' not in request.files: return jsonify({'error': 'No file provided'}), 400 file = request.files['file'] if file.filename == '': return jsonify({'error': 'No file selected'}), 400 if not allowed_file(file.filename): return jsonify({'error': 'File type not allowed'}), 400 # Save file filename = secure_filename(file.filename) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(filepath) # Get language code from request (default: Farsi) language = request.form.get('language', 'fa') # Transcribe result = whisper_model.transcribe(filepath, language=language, verbose=False) # Format response segments = [] for segment in result.get('segments', []): segments.append({ 'start': f"{int(segment['start'] // 3600):02d}:{int((segment['start'] % 3600) // 60):02d}:{int(segment['start'] % 60):02d}.{int((segment['start'] % 1) * 1000):03d}", 'end': f"{int(segment['end'] // 3600):02d}:{int((segment['end'] % 3600) // 60):02d}:{int(segment['end'] % 60):02d}.{int((segment['end'] % 1) * 1000):03d}", 'text': segment['text'].strip(), }) # Clean up uploaded file try: os.remove(filepath) except: pass return jsonify({ 'status': 'success', 'filename': filename, 'language': result.get('language', 'unknown'), 'text': result.get('text', ''), 'segments': segments }) except Exception as e: return jsonify({'error': str(e)}), 500 @app.route('/models', methods=['GET']) def get_models(): """Get available Whisper models""" return jsonify({ 'available_models': ['tiny', 'base', 'small', 'medium', 'large'], 'current_model': 'medium', 'description': 'List of available Whisper models. Larger models are more accurate but slower.' }) @app.route('/export', methods=['POST']) def export(): """ Export transcription in specified format Request: - transcription: Full transcription text - segments: Array of segments with timestamps - format: Export format (txt, srt, vtt, json) Response: - Exported file content """ try: data = request.json transcription = data.get('transcription', '') segments = data.get('segments', []) format_type = data.get('format', 'txt').lower() if format_type == 'txt': content = transcription mime_type = 'text/plain' elif format_type == 'srt': content = _format_srt(segments) mime_type = 'text/plain' elif format_type == 'vtt': content = _format_vtt(segments) mime_type = 'text/plain' elif format_type == 'json': import json content = json.dumps({'text': transcription, 'segments': segments}, ensure_ascii=False, indent=2) mime_type = 'application/json' else: return jsonify({'error': 'Unsupported format'}), 400 return jsonify({ 'status': 'success', 'format': format_type, 'content': content, 'mime_type': mime_type }) except Exception as e: return jsonify({'error': str(e)}), 500 def _format_srt(segments): """Format transcription as SRT subtitle format""" lines = [] for i, segment in enumerate(segments, 1): lines.append(str(i)) lines.append(f"{segment['start']} --> {segment['end']}") lines.append(segment['text']) lines.append('') return '\n'.join(lines) def _format_vtt(segments): """Format transcription as WebVTT subtitle format""" lines = ['WEBVTT', ''] for segment in segments: lines.append(f"{segment['start']} --> {segment['end']}") lines.append(segment['text']) lines.append('') return '\n'.join(lines) if __name__ == '__main__': port = int(os.getenv('PORT', 5000)) debug = os.getenv('FLASK_ENV', 'production') == 'development' app.run(debug=debug, host='0.0.0.0', port=port, threaded=True)