diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7557712 --- /dev/null +++ b/.gitignore @@ -0,0 +1,70 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +env/ +.env/ +.venv/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# OS +.DS_Store +Thumbs.db + +# Model cache +models/cache/ +*.h5 +*.tflite + +# Media files (test inputs/outputs) +*.mp4 +*.avi +*.mkv +*.mov +*.wav +*.mp3 +*.srt +!tests/fixtures/*.srt + +# Logs +*.log +logs/ + +# Jupyter +.ipynb_checkpoints/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..825894c --- /dev/null +++ b/README.md @@ -0,0 +1,150 @@ +# Intelligent Closed Caption (CC) Suggestion Tool + +An AI-powered tool that intelligently identifies moments in a video where a Closed Caption (CC) annotation is genuinely necessary — such as when a non-speech audio event meaningfully affects the speakers or the scene — and suggests contextually relevant CC text, without over-captioning routine or low-impact sounds. + +## Architecture + +``` +┌─────────────┐ ┌──────────────────┐ ┌──────────────────────┐ +│ Video File │───▶│ Audio Extractor │───▶│ Sound Event Detector │ +│ (input) │ │ (ffmpeg/moviepy) │ │ (YAMNet) │ +└──────┬───────┘ └──────────────────┘ └──────────┬───────────┘ + │ │ + │ ┌──────────────────┐ │ + └───────────▶│ Frame Extractor │ │ + │ (OpenCV) │ │ + └────────┬─────────┘ │ + │ │ + ┌────────▼─────────┐ │ + │ Reaction Detector│ │ + │ (MediaPipe) │ │ + └────────┬─────────┘ │ + │ │ + ┌────────▼─────────────────────────▼┐ + │ CC Decision Engine │ + │ Combines audio + visual signals │ + └────────────────┬───────────────────┘ + │ + ┌────────▼────────┐ + │ SRT Generator │ + └────────┬────────┘ + │ + ┌────────▼────────┐ + │ output.srt │ + └─────────────────┘ +``` + +## Features + +- **Sound Event Detection** — Automatically detects and classifies non-speech audio events (honking, explosions, laughter, music, alarms, applause, etc.) with confidence scores and timestamps using YAMNet. +- **Speaker Reaction Detection** — Analyzes video frames at detected event timestamps using MediaPipe to identify visible reactions (head turns, startled body language, facial expressions). +- **Intelligent CC Decisions** — Combines audio and visual signals to determine whether a CC annotation is truly warranted, avoiding over-captioning of ambient sounds. +- **SRT Output** — Generates standard SRT subtitle files with properly formatted timestamps and descriptive CC labels like `[honking]`, `[crowd cheering]`, `[gunshot]`. + +## Prerequisites + +- **Python 3.9+** +- **FFmpeg** — Must be installed and available on your system PATH + - Windows: `choco install ffmpeg` or download from [ffmpeg.org](https://ffmpeg.org/download.html) + - macOS: `brew install ffmpeg` + - Linux: `sudo apt install ffmpeg` + +## Installation + +1. **Clone the repository** + ```bash + git clone https://github.com/PlanetRead/Intelligent-cc-generation.git + cd Intelligent-cc-generation + ``` + +2. **Create a virtual environment** + ```bash + python -m venv venv + source venv/bin/activate # Linux/macOS + venv\Scripts\activate # Windows + ``` + +3. **Install dependencies** + ```bash + pip install -r requirements.txt + ``` + +4. **Install in development mode** (optional) + ```bash + pip install -e . + ``` + +## 🎯 Usage + +### Extract audio from a video file +```python +from src.utils.audio_extractor import AudioExtractor + +extractor = AudioExtractor() +audio_path = extractor.extract("input_video.mp4") +print(f"Audio saved to: {audio_path}") +``` + +### Full pipeline (coming soon) +```bash +python -m src.cli --input video.mp4 --output captions.srt +``` + +## Running Tests + +```bash +pytest tests/ -v +``` + +## Project Structure + +``` +Intelligent-cc-generation/ +├── src/ +│ ├── __init__.py +│ ├── cli.py # CLI entry point +│ ├── utils/ +│ │ ├── __init__.py +│ │ └── audio_extractor.py # Video → Audio extraction +│ ├── detectors/ +│ │ ├── __init__.py +│ │ ├── sound_event_detector.py # YAMNet-based audio analysis +│ │ └── reaction_detector.py # MediaPipe-based visual analysis +│ ├── models/ +│ │ ├── __init__.py +│ │ ├── event.py # SoundEvent dataclass +│ │ ├── reaction.py # ReactionEvent dataclass +│ │ └── cc_suggestion.py # CCSuggestion dataclass +│ ├── engine/ +│ │ ├── __init__.py +│ │ └── decision_engine.py # CC decision combiner +│ └── output/ +│ ├── __init__.py +│ └── srt_generator.py # SRT file writer +├── config/ +│ └── settings.py # Configuration defaults +├── tests/ +│ ├── __init__.py +│ ├── test_audio_extractor.py +│ └── fixtures/ +├── requirements.txt +├── setup.py +├── .gitignore +└── README.md +``` + +## Tech Stack + +| Component | Technology | +|-----------|-----------| +| Language | Python 3.9+ | +| Audio Event Detection | [YAMNet](https://tfhub.dev/google/yamnet/1) (TensorFlow Hub) | +| Frame Extraction | [OpenCV](https://opencv.org/) | +| Pose & Expression Analysis | [MediaPipe](https://mediapipe.dev/) | +| Audio Extraction | [FFmpeg](https://ffmpeg.org/) via moviepy | +| Output Format | SRT (SubRip Subtitle) | + + +## License + +This project is part of the [Planet Read](https://www.planetread.org/) initiative under the DMP 2026 program. diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..56096f2 --- /dev/null +++ b/config/__init__.py @@ -0,0 +1 @@ +"""Configuration package.""" diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000..744bba5 --- /dev/null +++ b/config/settings.py @@ -0,0 +1,115 @@ +"""Configuration settings for the Intelligent CC Suggestion Tool.""" + +import os + + +# ============================================================================= +# Audio Extraction Settings +# ============================================================================= + +# Default audio sample rate for extracted audio (Hz) +AUDIO_SAMPLE_RATE = 16000 + +# Default audio format for extracted files +AUDIO_FORMAT = "wav" + +# Default output directory for extracted audio files +AUDIO_OUTPUT_DIR = os.path.join(os.getcwd(), "output", "audio") + + +# ============================================================================= +# Sound Event Detection Settings +# ============================================================================= + +# Minimum confidence threshold for a sound event to be considered +SOUND_CONFIDENCE_THRESHOLD = 0.3 + +# Analysis window size in seconds for the sound event detector +ANALYSIS_WINDOW_SIZE = 0.96 # YAMNet default patch size + +# Hop length between analysis windows in seconds +ANALYSIS_HOP_LENGTH = 0.48 + +# Non-speech event categories to detect (YAMNet class names) +# Full list: https://github.com/tensorflow/models/blob/master/research/audioset/yamnet/yamnet_class_map.csv +TARGET_SOUND_EVENTS = [ + "Gunshot, gunfire", + "Explosion", + "Glass", + "Breaking", + "Siren", + "Car alarm", + "Vehicle horn, car horn, honking", + "Screaming", + "Crying, sobbing", + "Laughter", + "Applause", + "Cheering", + "Crowd", + "Dog", + "Thunder", + "Alarm", + "Bell", + "Door", + "Knock", + "Telephone", + "Music", + "Singing", + "Drum", + "Fire", + "Water", + "Rain", + "Wind", +] + + +# ============================================================================= +# Reaction Detection Settings +# ============================================================================= + +# Number of frames to extract around each event timestamp +REACTION_FRAME_COUNT = 10 + +# Time window (seconds) before and after event to look for reactions +REACTION_TIME_WINDOW = 1.5 + +# Minimum confidence for a reaction to be considered significant +REACTION_CONFIDENCE_THRESHOLD = 0.4 + +# Head turn angle threshold (degrees) to consider as a reaction +HEAD_TURN_THRESHOLD = 15.0 + +# Pose change threshold (normalized) for startled body language +POSE_CHANGE_THRESHOLD = 0.1 + + +# ============================================================================= +# CC Decision Engine Settings +# ============================================================================= + +# Weight for audio event confidence in the final decision +AUDIO_WEIGHT = 0.6 + +# Weight for visual reaction confidence in the final decision +VISUAL_WEIGHT = 0.4 + +# Combined confidence threshold for generating a CC annotation +CC_DECISION_THRESHOLD = 0.5 + +# Minimum duration (seconds) between consecutive CC annotations +# to avoid overwhelming the viewer +MIN_CC_GAP = 2.0 + + +# ============================================================================= +# Output Settings +# ============================================================================= + +# Default output format +OUTPUT_FORMAT = "srt" + +# Default output directory for generated subtitle files +OUTPUT_DIR = os.path.join(os.getcwd(), "output") + +# Default CC display duration (seconds) if not determined by event duration +DEFAULT_CC_DURATION = 2.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ce18ead --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +# Core dependencies +moviepy>=1.0.3 +numpy>=1.24.0 + +# Audio/Video processing +librosa>=0.10.0 +soundfile>=0.12.0 +opencv-python>=4.8.0 + +# ML Models +tensorflow>=2.13.0 +tensorflow-hub>=0.14.0 +mediapipe>=0.10.0 + +# Testing +pytest>=7.4.0 +pytest-cov>=4.1.0 + +# Utilities +pydub>=0.25.1 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..300f427 --- /dev/null +++ b/setup.py @@ -0,0 +1,41 @@ +"""Setup configuration for the Intelligent CC Suggestion Tool.""" + +from setuptools import setup, find_packages + +with open("README.md", "r", encoding="utf-8") as fh: + long_description = fh.read() + +with open("requirements.txt", "r", encoding="utf-8") as fh: + requirements = [ + line.strip() + for line in fh + if line.strip() and not line.startswith("#") + ] + +setup( + name="intelligent-cc-generation", + version="0.1.0", + author="Planet Read Contributors", + description="AI-powered tool for intelligent closed caption suggestions", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/PlanetRead/Intelligent-cc-generation", + packages=find_packages(), + python_requires=">=3.9", + install_requires=requirements, + entry_points={ + "console_scripts": [ + "cc-suggest=src.cli:main", + ], + }, + classifiers=[ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Multimedia :: Video", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ], +) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..7fca335 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,8 @@ +"""Intelligent Closed Caption (CC) Suggestion Tool. + +An AI-powered tool that intelligently identifies moments in a video +where a Closed Caption annotation is genuinely necessary and suggests +contextually relevant CC text. +""" + +__version__ = "0.1.0" diff --git a/src/utils/__init__.py b/src/utils/__init__.py new file mode 100644 index 0000000..f0ec284 --- /dev/null +++ b/src/utils/__init__.py @@ -0,0 +1 @@ +"""Utility modules for audio/video processing.""" diff --git a/src/utils/audio_extractor.py b/src/utils/audio_extractor.py new file mode 100644 index 0000000..51e8c75 --- /dev/null +++ b/src/utils/audio_extractor.py @@ -0,0 +1,282 @@ +"""Audio extraction utility for extracting audio tracks from video files. + +This module provides the AudioExtractor class that handles extracting +audio from various video formats and saving it as WAV files suitable +for downstream audio analysis. +""" + +import os +import logging +import subprocess +import shutil +from pathlib import Path +from typing import Optional + +from config.settings import AUDIO_SAMPLE_RATE, AUDIO_FORMAT, AUDIO_OUTPUT_DIR + +logger = logging.getLogger(__name__) + + +class AudioExtractionError(Exception): + """Raised when audio extraction from a video file fails.""" + + pass + + +class AudioExtractor: + """Extracts audio tracks from video files using FFmpeg. + + This extractor converts video files to mono WAV audio at a configurable + sample rate, suitable for input to audio analysis models like YAMNet. + + Attributes: + sample_rate: Target audio sample rate in Hz. + output_dir: Directory where extracted audio files will be saved. + audio_format: Output audio format (default: wav). + + Example: + >>> extractor = AudioExtractor(sample_rate=16000) + >>> audio_path = extractor.extract("input_video.mp4") + >>> print(f"Audio saved to: {audio_path}") + """ + + SUPPORTED_VIDEO_FORMATS = { + ".mp4", ".avi", ".mkv", ".mov", ".wmv", ".flv", ".webm", ".m4v", + } + + def __init__( + self, + sample_rate: int = AUDIO_SAMPLE_RATE, + output_dir: str = AUDIO_OUTPUT_DIR, + audio_format: str = AUDIO_FORMAT, + ): + """Initialize the AudioExtractor. + + Args: + sample_rate: Target audio sample rate in Hz. Defaults to 16000. + output_dir: Directory to save extracted audio files. + audio_format: Output audio file format. Defaults to 'wav'. + + Raises: + RuntimeError: If FFmpeg is not found on the system PATH. + """ + self.sample_rate = sample_rate + self.output_dir = output_dir + self.audio_format = audio_format + + # Verify FFmpeg is available + self._ffmpeg_path = self._find_ffmpeg() + if self._ffmpeg_path is None: + raise RuntimeError( + "FFmpeg not found. Please install FFmpeg and ensure it is " + "on your system PATH. Visit https://ffmpeg.org/download.html" + ) + + # Create output directory if it doesn't exist + os.makedirs(self.output_dir, exist_ok=True) + logger.info( + "AudioExtractor initialized (sample_rate=%d, format=%s, output=%s)", + self.sample_rate, + self.audio_format, + self.output_dir, + ) + + @staticmethod + def _find_ffmpeg() -> Optional[str]: + """Locate the FFmpeg executable on the system. + + Returns: + Path to the FFmpeg executable, or None if not found. + """ + ffmpeg_path = shutil.which("ffmpeg") + if ffmpeg_path: + logger.debug("Found FFmpeg at: %s", ffmpeg_path) + return ffmpeg_path + + def _validate_input(self, video_path: str) -> Path: + """Validate the input video file. + + Args: + video_path: Path to the video file. + + Returns: + Resolved Path object for the video file. + + Raises: + FileNotFoundError: If the video file does not exist. + ValueError: If the file format is not supported. + """ + path = Path(video_path).resolve() + + if not path.exists(): + raise FileNotFoundError(f"Video file not found: {path}") + + if not path.is_file(): + raise ValueError(f"Path is not a file: {path}") + + suffix = path.suffix.lower() + if suffix not in self.SUPPORTED_VIDEO_FORMATS: + raise ValueError( + f"Unsupported video format: '{suffix}'. " + f"Supported formats: {', '.join(sorted(self.SUPPORTED_VIDEO_FORMATS))}" + ) + + return path + + def _build_output_path( + self, video_path: Path, output_path: Optional[str] = None + ) -> Path: + """Build the output path for the extracted audio file. + + Args: + video_path: Path to the source video file. + output_path: Optional custom output path. If None, generates + one in the output directory. + + Returns: + Path for the output audio file. + """ + if output_path: + out = Path(output_path).resolve() + os.makedirs(out.parent, exist_ok=True) + return out + + filename = f"{video_path.stem}.{self.audio_format}" + return Path(self.output_dir) / filename + + def extract( + self, + video_path: str, + output_path: Optional[str] = None, + overwrite: bool = False, + ) -> str: + """Extract audio from a video file. + + Extracts the audio track from the given video file and saves it + as a mono WAV file at the configured sample rate. + + Args: + video_path: Path to the input video file. + output_path: Optional custom output file path. If not provided, + the audio will be saved in the configured output directory + with the same stem name as the video. + overwrite: If True, overwrite existing output file. + Defaults to False. + + Returns: + Absolute path to the extracted audio file. + + Raises: + FileNotFoundError: If the video file does not exist. + ValueError: If the video format is not supported. + AudioExtractionError: If FFmpeg fails to extract audio. + FileExistsError: If output file exists and overwrite is False. + """ + video = self._validate_input(video_path) + output = self._build_output_path(video, output_path) + + if output.exists() and not overwrite: + raise FileExistsError( + f"Output file already exists: {output}. " + "Use overwrite=True to replace it." + ) + + logger.info("Extracting audio from: %s", video) + logger.info("Output: %s", output) + + cmd = [ + self._ffmpeg_path, + "-i", str(video), # Input file + "-vn", # Disable video + "-acodec", "pcm_s16le", # 16-bit PCM encoding + "-ar", str(self.sample_rate), # Sample rate + "-ac", "1", # Mono channel + "-y" if overwrite else "-n", # Overwrite flag + str(output), + ] + + try: + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=300, # 5 minute timeout + ) + + if result.returncode != 0: + error_msg = result.stderr.strip().split("\n")[-1] + raise AudioExtractionError( + f"FFmpeg failed with return code {result.returncode}: " + f"{error_msg}" + ) + + except subprocess.TimeoutExpired: + raise AudioExtractionError( + f"Audio extraction timed out after 300 seconds for: {video}" + ) + except FileNotFoundError: + raise AudioExtractionError( + "FFmpeg executable not found. It may have been removed " + "after initialization." + ) + + if not output.exists(): + raise AudioExtractionError( + f"Audio extraction completed but output file not found: {output}" + ) + + file_size = output.stat().st_size + logger.info( + "Audio extraction successful: %s (%.2f MB)", + output, + file_size / (1024 * 1024), + ) + + return str(output) + + def get_audio_info(self, audio_path: str) -> dict: + """Get information about an audio file using FFprobe. + + Args: + audio_path: Path to the audio file. + + Returns: + Dictionary with audio information including duration, + sample_rate, channels, and codec. + """ + path = Path(audio_path).resolve() + if not path.exists(): + raise FileNotFoundError(f"Audio file not found: {path}") + + ffprobe_path = shutil.which("ffprobe") + if ffprobe_path is None: + raise RuntimeError("FFprobe not found on system PATH.") + + cmd = [ + ffprobe_path, + "-v", "quiet", + "-print_format", "json", + "-show_streams", + "-select_streams", "a:0", + str(path), + ] + + try: + import json + + result = subprocess.run( + cmd, capture_output=True, text=True, timeout=30 + ) + data = json.loads(result.stdout) + stream = data.get("streams", [{}])[0] + + return { + "duration": float(stream.get("duration", 0)), + "sample_rate": int(stream.get("sample_rate", 0)), + "channels": int(stream.get("channels", 0)), + "codec": stream.get("codec_name", "unknown"), + "bit_rate": int(stream.get("bit_rate", 0)), + } + except (subprocess.TimeoutExpired, json.JSONDecodeError, IndexError) as e: + logger.error("Failed to get audio info: %s", e) + return {} diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..46816dd --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Tests package.""" diff --git a/tests/fixtures/.gitkeep b/tests/fixtures/.gitkeep new file mode 100644 index 0000000..ac08cc0 --- /dev/null +++ b/tests/fixtures/.gitkeep @@ -0,0 +1,2 @@ +# Test fixtures directory +# Place sample SRT files and other test data here. diff --git a/tests/test_audio_extractor.py b/tests/test_audio_extractor.py new file mode 100644 index 0000000..8d12634 --- /dev/null +++ b/tests/test_audio_extractor.py @@ -0,0 +1,218 @@ +"""Unit tests for the AudioExtractor utility.""" + +import os +import pytest +from unittest.mock import patch, MagicMock +from pathlib import Path + +from src.utils.audio_extractor import AudioExtractor, AudioExtractionError + + +class TestAudioExtractorInit: + """Tests for AudioExtractor initialization.""" + + @patch("src.utils.audio_extractor.shutil.which") + def test_init_with_ffmpeg_available(self, mock_which, tmp_path): + """Should initialize successfully when FFmpeg is found.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + assert extractor.sample_rate == 16000 + assert extractor.audio_format == "wav" + + @patch("src.utils.audio_extractor.shutil.which") + def test_init_without_ffmpeg(self, mock_which): + """Should raise RuntimeError when FFmpeg is not found.""" + mock_which.return_value = None + with pytest.raises(RuntimeError, match="FFmpeg not found"): + AudioExtractor() + + @patch("src.utils.audio_extractor.shutil.which") + def test_init_custom_sample_rate(self, mock_which, tmp_path): + """Should accept custom sample rate.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(sample_rate=44100, output_dir=str(tmp_path)) + assert extractor.sample_rate == 44100 + + @patch("src.utils.audio_extractor.shutil.which") + def test_init_creates_output_directory(self, mock_which, tmp_path): + """Should create the output directory if it doesn't exist.""" + mock_which.return_value = "/usr/bin/ffmpeg" + output_dir = str(tmp_path / "new_dir" / "audio") + AudioExtractor(output_dir=output_dir) + assert os.path.isdir(output_dir) + + +class TestAudioExtractorValidation: + """Tests for input validation.""" + + @patch("src.utils.audio_extractor.shutil.which") + def test_validate_nonexistent_file(self, mock_which, tmp_path): + """Should raise FileNotFoundError for missing files.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + with pytest.raises(FileNotFoundError, match="Video file not found"): + extractor.extract("nonexistent_video.mp4") + + @patch("src.utils.audio_extractor.shutil.which") + def test_validate_unsupported_format(self, mock_which, tmp_path): + """Should raise ValueError for unsupported video formats.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + # Create a dummy file with unsupported extension + dummy_file = tmp_path / "test.xyz" + dummy_file.touch() + + with pytest.raises(ValueError, match="Unsupported video format"): + extractor.extract(str(dummy_file)) + + @patch("src.utils.audio_extractor.shutil.which") + def test_validate_directory_instead_of_file(self, mock_which, tmp_path): + """Should raise ValueError when path points to a directory.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + dir_path = tmp_path / "somedir.mp4" + dir_path.mkdir() + + with pytest.raises(ValueError, match="Path is not a file"): + extractor.extract(str(dir_path)) + + @patch("src.utils.audio_extractor.shutil.which") + def test_validate_supported_formats(self, mock_which, tmp_path): + """Should accept all supported video formats.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + for ext in AudioExtractor.SUPPORTED_VIDEO_FORMATS: + path = tmp_path / f"test{ext}" + path.touch() + # Validation should pass (extraction will fail but that's OK) + validated = extractor._validate_input(str(path)) + assert validated.exists() + + +class TestAudioExtractorExtract: + """Tests for the extract method.""" + + @patch("src.utils.audio_extractor.shutil.which") + def test_file_exists_error_without_overwrite(self, mock_which, tmp_path): + """Should raise FileExistsError when output exists and overwrite=False.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + # Create dummy input and output files + input_file = tmp_path / "test.mp4" + input_file.touch() + output_file = tmp_path / "test.wav" + output_file.touch() + + with pytest.raises(FileExistsError, match="Output file already exists"): + extractor.extract(str(input_file)) + + @patch("src.utils.audio_extractor.subprocess.run") + @patch("src.utils.audio_extractor.shutil.which") + def test_successful_extraction(self, mock_which, mock_run, tmp_path): + """Should successfully extract audio when FFmpeg succeeds.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + input_file = tmp_path / "test.mp4" + input_file.touch() + + output_file = tmp_path / "test.wav" + + # Use side_effect to create the output file when subprocess.run is called + # (simulating FFmpeg creating the file during execution) + def fake_ffmpeg(*args, **kwargs): + output_file.write_bytes(b"\x00" * 1024) + return MagicMock(returncode=0, stderr="") + + mock_run.side_effect = fake_ffmpeg + + result = extractor.extract(str(input_file)) + assert result == str(output_file) + assert mock_run.called + + @patch("src.utils.audio_extractor.subprocess.run") + @patch("src.utils.audio_extractor.shutil.which") + def test_ffmpeg_failure(self, mock_which, mock_run, tmp_path): + """Should raise AudioExtractionError when FFmpeg fails.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + input_file = tmp_path / "test.mp4" + input_file.touch() + + mock_run.return_value = MagicMock( + returncode=1, stderr="Error: Invalid data found" + ) + + with pytest.raises(AudioExtractionError, match="FFmpeg failed"): + extractor.extract(str(input_file)) + + @patch("src.utils.audio_extractor.subprocess.run") + @patch("src.utils.audio_extractor.shutil.which") + def test_extraction_timeout(self, mock_which, mock_run, tmp_path): + """Should raise AudioExtractionError on timeout.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + input_file = tmp_path / "test.mp4" + input_file.touch() + + import subprocess + mock_run.side_effect = subprocess.TimeoutExpired(cmd="ffmpeg", timeout=300) + + with pytest.raises(AudioExtractionError, match="timed out"): + extractor.extract(str(input_file)) + + @patch("src.utils.audio_extractor.subprocess.run") + @patch("src.utils.audio_extractor.shutil.which") + def test_custom_output_path(self, mock_which, mock_run, tmp_path): + """Should save to custom output path when specified.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + input_file = tmp_path / "test.mp4" + input_file.touch() + + custom_output = tmp_path / "custom" / "output.wav" + + # Use side_effect to create output file during FFmpeg execution + def fake_ffmpeg(*args, **kwargs): + custom_output.parent.mkdir(parents=True, exist_ok=True) + custom_output.write_bytes(b"\x00" * 512) + return MagicMock(returncode=0, stderr="") + + mock_run.side_effect = fake_ffmpeg + + result = extractor.extract(str(input_file), output_path=str(custom_output)) + assert result == str(custom_output) + + +class TestBuildOutputPath: + """Tests for output path generation.""" + + @patch("src.utils.audio_extractor.shutil.which") + def test_default_output_path(self, mock_which, tmp_path): + """Should generate output path in the configured output directory.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + video_path = Path("/some/path/my_video.mp4") + result = extractor._build_output_path(video_path) + + assert result == Path(tmp_path) / "my_video.wav" + + @patch("src.utils.audio_extractor.shutil.which") + def test_custom_output_path(self, mock_which, tmp_path): + """Should use custom output path when provided.""" + mock_which.return_value = "/usr/bin/ffmpeg" + extractor = AudioExtractor(output_dir=str(tmp_path)) + + video_path = Path("/some/path/my_video.mp4") + custom_path = str(tmp_path / "custom_name.wav") + result = extractor._build_output_path(video_path, custom_path) + + assert result == Path(custom_path).resolve()