"""
Farsi Transcriber Backend API

Flask API for handling audio/video file transcription using Whisper model.
"""

import os
import sys
from pathlib import Path
from werkzeug.utils import secure_filename
import whisper
from flask import Flask, request, jsonify
from flask_cors import CORS

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent.parent))

app = Flask(__name__)
CORS(app)

# Configuration
UPLOAD_FOLDER = '/tmp/farsi_transcriber_uploads'
ALLOWED_EXTENSIONS = {'mp3', 'wav', 'm4a', 'flac', 'ogg', 'aac', 'wma', 'mp4', 'mkv', 'mov', 'webm', 'avi', 'flv', 'wmv'}
MAX_FILE_SIZE = 500 * 1024 * 1024  # 500MB

os.makedirs(UPLOAD_FOLDER, exist_ok=True)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE

# Load Whisper model
try:
    model = whisper.load_model('medium')
    print("✓ Whisper model loaded successfully")
except Exception as e:
    print(f"✗ Error loading Whisper model: {e}")
    model = None


def allowed_file(filename):
    """Check if file has allowed extension"""
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint"""
    return jsonify({
        'status': 'healthy',
        'model_loaded': model is not None,
        'device': 'cuda' if model else 'N/A'
    })


@app.route('/transcribe', methods=['POST'])
def transcribe():
    """
    Transcribe audio/video file

    Request:
    - file: Audio/video file
    - language: Language code (default: 'fa' for Farsi)

    Response:
    - transcription results with segments and timestamps
    """
    try:
        # Check if model is loaded
        if not model:
            return jsonify({'error': 'Whisper model not loaded'}), 500

        # Check if file is in request
        if 'file' not in request.files:
            return jsonify({'error': 'No file provided'}), 400

        file = request.files['file']

        if file.filename == '':
            return jsonify({'error': 'No file selected'}), 400

        if not allowed_file(file.filename):
            return jsonify({'error': 'File type not allowed'}), 400

        # Save file
        filename = secure_filename(file.filename)
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)

        # Get language code from request (default: Farsi)
        language = request.form.get('language', 'fa')

        # Transcribe
        result = model.transcribe(filepath, language=language, verbose=False)

        # Format response
        segments = []
        for segment in result.get('segments', []):
            segments.append({
                'start': f"{int(segment['start'] // 3600):02d}:{int((segment['start'] % 3600) // 60):02d}:{int(segment['start'] % 60):02d}.{int((segment['start'] % 1) * 1000):03d}",
                'end': f"{int(segment['end'] // 3600):02d}:{int((segment['end'] % 3600) // 60):02d}:{int(segment['end'] % 60):02d}.{int((segment['end'] % 1) * 1000):03d}",
                'text': segment['text'].strip(),
            })

        # Clean up uploaded file
        try:
            os.remove(filepath)
        except:
            pass

        return jsonify({
            'status': 'success',
            'filename': filename,
            'language': result.get('language', 'unknown'),
            'text': result.get('text', ''),
            'segments': segments
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500


@app.route('/models', methods=['GET'])
def get_models():
    """Get available Whisper models"""
    return jsonify({
        'available_models': ['tiny', 'base', 'small', 'medium', 'large'],
        'current_model': 'medium',
        'description': 'List of available Whisper models. Larger models are more accurate but slower.'
    })


@app.route('/export', methods=['POST'])
def export():
    """
    Export transcription in specified format

    Request:
    - transcription: Full transcription text
    - segments: Array of segments with timestamps
    - format: Export format (txt, srt, vtt, json)

    Response:
    - Exported file content
    """
    try:
        data = request.json
        transcription = data.get('transcription', '')
        segments = data.get('segments', [])
        format_type = data.get('format', 'txt').lower()

        if format_type == 'txt':
            content = transcription
            mime_type = 'text/plain'
        elif format_type == 'srt':
            content = _format_srt(segments)
            mime_type = 'text/plain'
        elif format_type == 'vtt':
            content = _format_vtt(segments)
            mime_type = 'text/plain'
        elif format_type == 'json':
            import json
            content = json.dumps({'text': transcription, 'segments': segments}, ensure_ascii=False, indent=2)
            mime_type = 'application/json'
        else:
            return jsonify({'error': 'Unsupported format'}), 400

        return jsonify({
            'status': 'success',
            'format': format_type,
            'content': content,
            'mime_type': mime_type
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500


def _format_srt(segments):
    """Format transcription as SRT subtitle format"""
    lines = []
    for i, segment in enumerate(segments, 1):
        lines.append(str(i))
        lines.append(f"{segment['start']} --> {segment['end']}")
        lines.append(segment['text'])
        lines.append('')
    return '\n'.join(lines)


def _format_vtt(segments):
    """Format transcription as WebVTT subtitle format"""
    lines = ['WEBVTT', '']
    for segment in segments:
        lines.append(f"{segment['start']} --> {segment['end']}")
        lines.append(segment['text'])
        lines.append('')
    return '\n'.join(lines)


if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=5000)