From dd57adab1865ef56523b34139ee5de90b0326813 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 12 Nov 2025 05:12:06 +0000 Subject: [PATCH] feat: Implement comprehensive export functionality - Create TranscriptionExporter utility supporting TXT, SRT, VTT, JSON, TSV formats - Implement proper timestamp formatting for subtitle formats - Update GUI export dialog with all supported formats - Integrate exporter with main window - Add robust error handling for export operations - Phase 4 complete: Full export capabilities ready --- farsi_transcriber/ui/main_window.py | 26 +++-- farsi_transcriber/utils/export.py | 164 ++++++++++++++++++++++++++++ 2 files changed, 183 insertions(+), 7 deletions(-) create mode 100644 farsi_transcriber/utils/export.py diff --git a/farsi_transcriber/ui/main_window.py b/farsi_transcriber/ui/main_window.py index e8ad3cd..657e866 100644 --- a/farsi_transcriber/ui/main_window.py +++ b/farsi_transcriber/ui/main_window.py @@ -23,6 +23,7 @@ from PyQt6.QtWidgets import ( from PyQt6.QtGui import QFont from farsi_transcriber.models.whisper_transcriber import FarsiTranscriber +from farsi_transcriber.utils.export import TranscriptionExporter class TranscriptionWorker(QThread): @@ -239,24 +240,35 @@ class MainWindow(QMainWindow): def on_export(self): """Handle export button click""" - if not hasattr(self, "last_result"): + if not self.last_result: QMessageBox.warning(self, "Warning", "No transcription to export.") return - file_path, file_format = QFileDialog.getSaveFileName( + file_path, file_filter = QFileDialog.getSaveFileName( self, "Export Transcription", "", - "Text Files (*.txt);;SRT Files (*.srt);;JSON Files (*.json)", + "Text Files (*.txt);;SRT Subtitles (*.srt);;WebVTT Subtitles (*.vtt);;JSON (*.json);;TSV (*.tsv)", ) if file_path: try: - # TODO: Implement export logic in Phase 4 - with open(file_path, "w", encoding="utf-8") as f: - f.write(self.results_text.toPlainText()) + file_path = Path(file_path) + + # Determine format from file extension + suffix = file_path.suffix.lower().lstrip(".") + if not suffix: + # Default to txt if no extension + suffix = "txt" + file_path = file_path.with_suffix(".txt") + + # Export using the appropriate format + TranscriptionExporter.export(self.last_result, file_path, suffix) + QMessageBox.information( - self, "Success", f"Results exported to {Path(file_path).name}" + self, + "Success", + f"Transcription exported successfully to:\n{file_path.name}", ) except Exception as e: QMessageBox.critical( diff --git a/farsi_transcriber/utils/export.py b/farsi_transcriber/utils/export.py new file mode 100644 index 0000000..ab3a3c8 --- /dev/null +++ b/farsi_transcriber/utils/export.py @@ -0,0 +1,164 @@ +""" +Export utilities for transcription results + +Supports multiple export formats: TXT, SRT, JSON, TSV, VTT +""" + +import json +from datetime import timedelta +from pathlib import Path +from typing import Dict, List + + +class TranscriptionExporter: + """Export transcription results in various formats""" + + @staticmethod + def export_txt(result: Dict, file_path: Path) -> None: + """ + Export transcription as plain text file. + + Args: + result: Transcription result dictionary + file_path: Output file path + """ + text = result.get("full_text", "") or result.get("text", "") + + with open(file_path, "w", encoding="utf-8") as f: + f.write(text) + + @staticmethod + def export_srt(result: Dict, file_path: Path) -> None: + """ + Export transcription as SRT subtitle file. + + Args: + result: Transcription result dictionary + file_path: Output file path + """ + segments = result.get("segments", []) + + with open(file_path, "w", encoding="utf-8") as f: + for i, segment in enumerate(segments, 1): + start = TranscriptionExporter._format_srt_time(segment.get("start", 0)) + end = TranscriptionExporter._format_srt_time(segment.get("end", 0)) + text = segment.get("text", "").strip() + + if text: + f.write(f"{i}\n") + f.write(f"{start} --> {end}\n") + f.write(f"{text}\n\n") + + @staticmethod + def export_vtt(result: Dict, file_path: Path) -> None: + """ + Export transcription as WebVTT subtitle file. + + Args: + result: Transcription result dictionary + file_path: Output file path + """ + segments = result.get("segments", []) + + with open(file_path, "w", encoding="utf-8") as f: + f.write("WEBVTT\n\n") + + for segment in segments: + start = TranscriptionExporter._format_vtt_time(segment.get("start", 0)) + end = TranscriptionExporter._format_vtt_time(segment.get("end", 0)) + text = segment.get("text", "").strip() + + if text: + f.write(f"{start} --> {end}\n") + f.write(f"{text}\n\n") + + @staticmethod + def export_json(result: Dict, file_path: Path) -> None: + """ + Export transcription as JSON file. + + Args: + result: Transcription result dictionary + file_path: Output file path + """ + with open(file_path, "w", encoding="utf-8") as f: + json.dump(result, f, ensure_ascii=False, indent=2) + + @staticmethod + def export_tsv(result: Dict, file_path: Path) -> None: + """ + Export transcription as TSV (tab-separated values) file. + + Args: + result: Transcription result dictionary + file_path: Output file path + """ + segments = result.get("segments", []) + + with open(file_path, "w", encoding="utf-8") as f: + # Write header + f.write("Index\tStart\tEnd\tDuration\tText\n") + + for i, segment in enumerate(segments, 1): + start = segment.get("start", 0) + end = segment.get("end", 0) + duration = end - start + text = segment.get("text", "").strip() + + if text: + f.write( + f"{i}\t{start:.2f}\t{end:.2f}\t{duration:.2f}\t{text}\n" + ) + + @staticmethod + def export( + result: Dict, file_path: Path, format_type: str = "txt" + ) -> None: + """ + Export transcription in specified format. + + Args: + result: Transcription result dictionary + file_path: Output file path + format_type: Export format ('txt', 'srt', 'vtt', 'json', 'tsv') + + Raises: + ValueError: If format is not supported + """ + format_type = format_type.lower() + + exporters = { + "txt": TranscriptionExporter.export_txt, + "srt": TranscriptionExporter.export_srt, + "vtt": TranscriptionExporter.export_vtt, + "json": TranscriptionExporter.export_json, + "tsv": TranscriptionExporter.export_tsv, + } + + if format_type not in exporters: + raise ValueError( + f"Unsupported format: {format_type}. " + f"Supported formats: {list(exporters.keys())}" + ) + + exporters[format_type](result, file_path) + + @staticmethod + def _format_srt_time(seconds: float) -> str: + """Format time for SRT format (HH:MM:SS,mmm)""" + td = timedelta(seconds=seconds) + hours, remainder = divmod(int(td.total_seconds()), 3600) + minutes, secs = divmod(remainder, 60) + milliseconds = int((seconds % 1) * 1000) + + return f"{hours:02d}:{minutes:02d}:{secs:02d},{milliseconds:03d}" + + @staticmethod + def _format_vtt_time(seconds: float) -> str: + """Format time for VTT format (HH:MM:SS.mmm)""" + td = timedelta(seconds=seconds) + hours, remainder = divmod(int(td.total_seconds()), 3600) + minutes, secs = divmod(remainder, 60) + milliseconds = int((seconds % 1) * 1000) + + return f"{hours:02d}:{minutes:02d}:{secs:02d}.{milliseconds:03d}"