feat: Implement comprehensive export functionality

- Create TranscriptionExporter utility supporting TXT, SRT, VTT, JSON, TSV formats
- Implement proper timestamp formatting for subtitle formats
- Update GUI export dialog with all supported formats
- Integrate exporter with main window
- Add robust error handling for export operations
- Phase 4 complete: Full export capabilities ready
This commit is contained in:
Claude 2025-11-12 05:12:06 +00:00
parent 3fa194fa1f
commit dd57adab18
No known key found for this signature in database
2 changed files with 183 additions and 7 deletions

View File

@ -23,6 +23,7 @@ from PyQt6.QtWidgets import (
from PyQt6.QtGui import QFont from PyQt6.QtGui import QFont
from farsi_transcriber.models.whisper_transcriber import FarsiTranscriber from farsi_transcriber.models.whisper_transcriber import FarsiTranscriber
from farsi_transcriber.utils.export import TranscriptionExporter
class TranscriptionWorker(QThread): class TranscriptionWorker(QThread):
@ -239,24 +240,35 @@ class MainWindow(QMainWindow):
def on_export(self): def on_export(self):
"""Handle export button click""" """Handle export button click"""
if not hasattr(self, "last_result"): if not self.last_result:
QMessageBox.warning(self, "Warning", "No transcription to export.") QMessageBox.warning(self, "Warning", "No transcription to export.")
return return
file_path, file_format = QFileDialog.getSaveFileName( file_path, file_filter = QFileDialog.getSaveFileName(
self, self,
"Export Transcription", "Export Transcription",
"", "",
"Text Files (*.txt);;SRT Files (*.srt);;JSON Files (*.json)", "Text Files (*.txt);;SRT Subtitles (*.srt);;WebVTT Subtitles (*.vtt);;JSON (*.json);;TSV (*.tsv)",
) )
if file_path: if file_path:
try: try:
# TODO: Implement export logic in Phase 4 file_path = Path(file_path)
with open(file_path, "w", encoding="utf-8") as f:
f.write(self.results_text.toPlainText()) # Determine format from file extension
suffix = file_path.suffix.lower().lstrip(".")
if not suffix:
# Default to txt if no extension
suffix = "txt"
file_path = file_path.with_suffix(".txt")
# Export using the appropriate format
TranscriptionExporter.export(self.last_result, file_path, suffix)
QMessageBox.information( QMessageBox.information(
self, "Success", f"Results exported to {Path(file_path).name}" self,
"Success",
f"Transcription exported successfully to:\n{file_path.name}",
) )
except Exception as e: except Exception as e:
QMessageBox.critical( QMessageBox.critical(

View File

@ -0,0 +1,164 @@
"""
Export utilities for transcription results
Supports multiple export formats: TXT, SRT, JSON, TSV, VTT
"""
import json
from datetime import timedelta
from pathlib import Path
from typing import Dict, List
class TranscriptionExporter:
"""Export transcription results in various formats"""
@staticmethod
def export_txt(result: Dict, file_path: Path) -> None:
"""
Export transcription as plain text file.
Args:
result: Transcription result dictionary
file_path: Output file path
"""
text = result.get("full_text", "") or result.get("text", "")
with open(file_path, "w", encoding="utf-8") as f:
f.write(text)
@staticmethod
def export_srt(result: Dict, file_path: Path) -> None:
"""
Export transcription as SRT subtitle file.
Args:
result: Transcription result dictionary
file_path: Output file path
"""
segments = result.get("segments", [])
with open(file_path, "w", encoding="utf-8") as f:
for i, segment in enumerate(segments, 1):
start = TranscriptionExporter._format_srt_time(segment.get("start", 0))
end = TranscriptionExporter._format_srt_time(segment.get("end", 0))
text = segment.get("text", "").strip()
if text:
f.write(f"{i}\n")
f.write(f"{start} --> {end}\n")
f.write(f"{text}\n\n")
@staticmethod
def export_vtt(result: Dict, file_path: Path) -> None:
"""
Export transcription as WebVTT subtitle file.
Args:
result: Transcription result dictionary
file_path: Output file path
"""
segments = result.get("segments", [])
with open(file_path, "w", encoding="utf-8") as f:
f.write("WEBVTT\n\n")
for segment in segments:
start = TranscriptionExporter._format_vtt_time(segment.get("start", 0))
end = TranscriptionExporter._format_vtt_time(segment.get("end", 0))
text = segment.get("text", "").strip()
if text:
f.write(f"{start} --> {end}\n")
f.write(f"{text}\n\n")
@staticmethod
def export_json(result: Dict, file_path: Path) -> None:
"""
Export transcription as JSON file.
Args:
result: Transcription result dictionary
file_path: Output file path
"""
with open(file_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
@staticmethod
def export_tsv(result: Dict, file_path: Path) -> None:
"""
Export transcription as TSV (tab-separated values) file.
Args:
result: Transcription result dictionary
file_path: Output file path
"""
segments = result.get("segments", [])
with open(file_path, "w", encoding="utf-8") as f:
# Write header
f.write("Index\tStart\tEnd\tDuration\tText\n")
for i, segment in enumerate(segments, 1):
start = segment.get("start", 0)
end = segment.get("end", 0)
duration = end - start
text = segment.get("text", "").strip()
if text:
f.write(
f"{i}\t{start:.2f}\t{end:.2f}\t{duration:.2f}\t{text}\n"
)
@staticmethod
def export(
result: Dict, file_path: Path, format_type: str = "txt"
) -> None:
"""
Export transcription in specified format.
Args:
result: Transcription result dictionary
file_path: Output file path
format_type: Export format ('txt', 'srt', 'vtt', 'json', 'tsv')
Raises:
ValueError: If format is not supported
"""
format_type = format_type.lower()
exporters = {
"txt": TranscriptionExporter.export_txt,
"srt": TranscriptionExporter.export_srt,
"vtt": TranscriptionExporter.export_vtt,
"json": TranscriptionExporter.export_json,
"tsv": TranscriptionExporter.export_tsv,
}
if format_type not in exporters:
raise ValueError(
f"Unsupported format: {format_type}. "
f"Supported formats: {list(exporters.keys())}"
)
exporters[format_type](result, file_path)
@staticmethod
def _format_srt_time(seconds: float) -> str:
"""Format time for SRT format (HH:MM:SS,mmm)"""
td = timedelta(seconds=seconds)
hours, remainder = divmod(int(td.total_seconds()), 3600)
minutes, secs = divmod(remainder, 60)
milliseconds = int((seconds % 1) * 1000)
return f"{hours:02d}:{minutes:02d}:{secs:02d},{milliseconds:03d}"
@staticmethod
def _format_vtt_time(seconds: float) -> str:
"""Format time for VTT format (HH:MM:SS.mmm)"""
td = timedelta(seconds=seconds)
hours, remainder = divmod(int(td.total_seconds()), 3600)
minutes, secs = divmod(remainder, 60)
milliseconds = int((seconds % 1) * 1000)
return f"{hours:02d}:{minutes:02d}:{secs:02d}.{milliseconds:03d}"