From 86b2a93dee6cbcc9a26b3dcf7143b859a94c0251 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 12 Nov 2025 05:09:15 +0000 Subject: [PATCH] feat: Initialize Farsi Transcriber application structure - Create project directories (ui, models, utils) - Add PyQt6 environment setup with requirements.txt - Create main entry point (main.py) - Add comprehensive README with setup instructions - Add .gitignore for Python, PyTorch, and ML artifacts - Phase 1 complete: project structure and environment ready --- farsi_transcriber/.gitignore | 52 ++++++++++++ farsi_transcriber/README.md | 113 +++++++++++++++++++++++++++ farsi_transcriber/__init__.py | 8 ++ farsi_transcriber/main.py | 28 +++++++ farsi_transcriber/models/__init__.py | 1 + farsi_transcriber/requirements.txt | 7 ++ farsi_transcriber/ui/__init__.py | 1 + farsi_transcriber/utils/__init__.py | 1 + 8 files changed, 211 insertions(+) create mode 100644 farsi_transcriber/.gitignore create mode 100644 farsi_transcriber/README.md create mode 100644 farsi_transcriber/__init__.py create mode 100644 farsi_transcriber/main.py create mode 100644 farsi_transcriber/models/__init__.py create mode 100644 farsi_transcriber/requirements.txt create mode 100644 farsi_transcriber/ui/__init__.py create mode 100644 farsi_transcriber/utils/__init__.py diff --git a/farsi_transcriber/.gitignore b/farsi_transcriber/.gitignore new file mode 100644 index 0000000..c051891 --- /dev/null +++ b/farsi_transcriber/.gitignore @@ -0,0 +1,52 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# PyTorch/ML Models +*.pt +*.pth +models/downloaded/ + +# Whisper models cache +~/.cache/whisper/ + +# Application outputs +transcriptions/ +exports/ +*.log + +# Testing +.pytest_cache/ +.coverage +htmlcov/ diff --git a/farsi_transcriber/README.md b/farsi_transcriber/README.md new file mode 100644 index 0000000..548301f --- /dev/null +++ b/farsi_transcriber/README.md @@ -0,0 +1,113 @@ +# Farsi Transcriber + +A desktop application for transcribing Farsi audio and video files using OpenAI's Whisper model. + +## Features + +- 🎙️ Transcribe audio files (MP3, WAV, M4A, FLAC, OGG, etc.) +- 🎬 Extract audio from video files (MP4, MKV, MOV, WebM, AVI, etc.) +- 🇮🇷 High-accuracy Farsi transcription +- ⏱️ Word-level timestamps +- 📤 Export to multiple formats (TXT, SRT, JSON) +- 💻 Clean PyQt6-based GUI + +## System Requirements + +- Python 3.8+ +- ffmpeg (for audio/video processing) +- 8GB+ RAM recommended (for high-accuracy model) + +### Install ffmpeg + +**Ubuntu/Debian:** +```bash +sudo apt update && sudo apt install ffmpeg +``` + +**macOS (Homebrew):** +```bash +brew install ffmpeg +``` + +**Windows (Chocolatey):** +```bash +choco install ffmpeg +``` + +## Installation + +1. Clone the repository +2. Create a virtual environment: +```bash +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +3. Install dependencies: +```bash +pip install -r requirements.txt +``` + +4. Run the application: +```bash +python main.py +``` + +## Usage + +### GUI Application +```bash +python main.py +``` + +Then: +1. Click "Select File" to choose an audio or video file +2. Click "Transcribe" and wait for processing +3. View results with timestamps +4. Export to your preferred format + +### Command Line (Coming Soon) +```bash +python -m farsi_transcriber --input audio.mp3 --output transcription.srt +``` + +## Model Information + +This application uses OpenAI's Whisper model optimized for Farsi: +- **Model**: medium or large (configurable) +- **Accuracy**: Optimized for Persian language +- **Processing**: Local processing (no cloud required) + +## Project Structure + +``` +farsi_transcriber/ +├── ui/ # PyQt6 UI components +├── models/ # Whisper model management +├── utils/ # Utility functions +├── main.py # Application entry point +├── requirements.txt # Python dependencies +└── README.md # This file +``` + +## Development + +### Running Tests +```bash +pytest tests/ +``` + +### Code Style +```bash +black . +flake8 . +isort . +``` + +## License + +MIT License - See LICENSE file for details + +## Contributing + +This is a personal project, but feel free to fork and modify for your needs! diff --git a/farsi_transcriber/__init__.py b/farsi_transcriber/__init__.py new file mode 100644 index 0000000..8e2e5fa --- /dev/null +++ b/farsi_transcriber/__init__.py @@ -0,0 +1,8 @@ +""" +Farsi Transcriber Application + +A desktop application for transcribing Farsi audio and video files using OpenAI's Whisper. +""" + +__version__ = "0.1.0" +__author__ = "Personal Project" diff --git a/farsi_transcriber/main.py b/farsi_transcriber/main.py new file mode 100644 index 0000000..65304db --- /dev/null +++ b/farsi_transcriber/main.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +""" +Farsi Transcriber - Main Entry Point + +A PyQt6-based desktop application for transcribing Farsi audio and video files. +""" + +import sys +from PyQt6.QtWidgets import QApplication + + +def main(): + """Main entry point for the application""" + app = QApplication(sys.argv) + + # TODO: Import and create main window + # from ui.main_window import MainWindow + # window = MainWindow() + # window.show() + + print("Farsi Transcriber App initialized (setup phase)") + print("✓ PyQt6 environment ready") + + sys.exit(app.exec()) + + +if __name__ == "__main__": + main() diff --git a/farsi_transcriber/models/__init__.py b/farsi_transcriber/models/__init__.py new file mode 100644 index 0000000..fd5a6a4 --- /dev/null +++ b/farsi_transcriber/models/__init__.py @@ -0,0 +1 @@ +"""Model management for Farsi Transcriber""" diff --git a/farsi_transcriber/requirements.txt b/farsi_transcriber/requirements.txt new file mode 100644 index 0000000..612f9b2 --- /dev/null +++ b/farsi_transcriber/requirements.txt @@ -0,0 +1,7 @@ +PyQt6==6.6.1 +PyQt6-Qt6==6.6.1 +PyQt6-sip==13.6.0 +torch>=1.10.1 +numpy +openai-whisper +tqdm diff --git a/farsi_transcriber/ui/__init__.py b/farsi_transcriber/ui/__init__.py new file mode 100644 index 0000000..435adac --- /dev/null +++ b/farsi_transcriber/ui/__init__.py @@ -0,0 +1 @@ +"""UI components for Farsi Transcriber""" diff --git a/farsi_transcriber/utils/__init__.py b/farsi_transcriber/utils/__init__.py new file mode 100644 index 0000000..9c3f775 --- /dev/null +++ b/farsi_transcriber/utils/__init__.py @@ -0,0 +1 @@ +"""Utility functions for Farsi Transcriber"""