bini-shorts-maker/backend/app/services/video_processor.py

import subprocess
import asyncio
import os
from typing import Optional, Tuple
from app.config import settings


async def process_video(
    input_path: str,
    output_path: str,
    subtitle_path: Optional[str] = None,
    bgm_path: Optional[str] = None,
    bgm_volume: float = 0.3,
    keep_original_audio: bool = False,
    intro_text: Optional[str] = None,
    intro_duration: float = 0.7,
    intro_font_size: int = 100,
    intro_position: str = "center",  # top, center, bottom
) -> Tuple[bool, str]:
    """
    Process video: remove audio, add subtitles, add BGM, add intro text.

    Args:
        input_path: Path to input video
        output_path: Path for output video
        subtitle_path: Path to ASS/SRT subtitle file
        bgm_path: Path to BGM audio file
        bgm_volume: Volume level for BGM (0.0 - 1.0)
        keep_original_audio: Whether to keep original audio
        intro_text: Text to display at the beginning of video (YouTube Shorts thumbnail)
        intro_duration: How long to display intro text (seconds)
        intro_font_size: Font size for intro text (100-120 recommended)

    Returns:
        Tuple of (success, message)
    """
    if not os.path.exists(input_path):
        return False, f"Input video not found: {input_path}"

    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Build FFmpeg command
    cmd = ["ffmpeg", "-y"]  # -y to overwrite

    # Input video
    cmd.extend(["-i", input_path])

    # Input BGM if provided (stream_loop must come BEFORE -i)
    if bgm_path and os.path.exists(bgm_path):
        cmd.extend(["-stream_loop", "-1"])  # Loop BGM infinitely
        cmd.extend(["-i", bgm_path])

    # Build filter complex
    filter_parts = []
    audio_parts = []

    # Audio handling
    if keep_original_audio and bgm_path and os.path.exists(bgm_path):
        # Mix original audio with BGM
        filter_parts.append(f"[0:a]volume=1.0[original]")
        filter_parts.append(f"[1:a]volume={bgm_volume}[bgm]")
        filter_parts.append(f"[original][bgm]amix=inputs=2:duration=shortest[audio]")
        audio_output = "[audio]"
    elif bgm_path and os.path.exists(bgm_path):
        # BGM only (no original audio)
        filter_parts.append(f"[1:a]volume={bgm_volume}[audio]")
        audio_output = "[audio]"
    elif keep_original_audio:
        # Original audio only
        audio_output = "0:a"
    else:
        # No audio
        audio_output = None

    # Build video filter chain
    video_filters = []

    # 1. Add freeze frame at the beginning if intro text is provided
    # tpad adds frozen frames at start using clone mode (copies first frame)
    if intro_text and intro_duration > 0:
        # Clone the first frame for intro_duration seconds
        video_filters.append(f"tpad=start_duration={intro_duration}:start_mode=clone")

    # 2. Add subtitle overlay if provided
    if subtitle_path and os.path.exists(subtitle_path):
        escaped_path = subtitle_path.replace("\\", "/").replace(":", "\\:").replace("'", "\\'")
        video_filters.append(f"ass='{escaped_path}'")

    # 3. Add intro text overlay if provided (shown during frozen frame portion)
    if intro_text:
        # Find a suitable font - try common Korean fonts
        font_options = [
            "/System/Library/Fonts/Supplemental/AppleGothic.ttf",  # macOS Korean
            "/System/Library/Fonts/AppleSDGothicNeo.ttc",  # macOS Korean
            "/usr/share/fonts/truetype/nanum/NanumGothicBold.ttf",  # Linux Korean
            "/usr/share/fonts/truetype/korean/Pretendard-Bold.otf",  # Docker Korean
            "/usr/share/fonts/opentype/noto/NotoSansCJK-Bold.ttc",  # Linux CJK
        ]

        font_file = None
        for font in font_options:
            if os.path.exists(font):
                font_file = font.replace(":", "\\:")
                break

        # Calculate font size based on text length to prevent overflow
        # Shorts video is typically 720px width
        # Korean characters are nearly square (width ≈ height), so char_width_ratio ≈ 1.0
        video_width = 720  # Default Shorts width
        box_padding = 40  # boxborderw=20 on each side
        max_width_ratio = 0.75  # Leave 25% margin for safety
        char_width_ratio = 1.0  # Korean characters are nearly square
        available_width = (video_width * max_width_ratio) - box_padding

        # Split text into 2 lines if too long (more than 10 chars or font would be too small)
        text_len = len(intro_text)
        single_line_font = int(available_width / (text_len * char_width_ratio))

        # Use 2 lines if single line font would be less than 50px
        if single_line_font < 50 and text_len > 6:
            # Find best split point (prefer space near middle)
            mid = len(intro_text) // 2
            split_pos = None

            # Search for space within 5 chars of middle
            for offset in range(6):
                if mid + offset < len(intro_text) and intro_text[mid + offset] == ' ':
                    split_pos = mid + offset
                    break
                if mid - offset >= 0 and intro_text[mid - offset] == ' ':
                    split_pos = mid - offset
                    break

            # If no space found, split at middle
            if split_pos is None:
                split_pos = mid

            line1 = intro_text[:split_pos].strip()
            line2 = intro_text[split_pos:].strip()
            display_text = f"{line1}\\n{line2}"

            # Calculate font size based on longer line
            max_line_len = max(len(line1), len(line2))
            calculated_max_font = int(available_width / (max_line_len * char_width_ratio))
            print(f"[Intro] Split into 2 lines: '{line1}' / '{line2}' (max {max_line_len} chars)")
        else:
            display_text = intro_text
            calculated_max_font = single_line_font
            print(f"[Intro] Single line: '{intro_text}' ({text_len} chars)")

        adjusted_font_size = min(intro_font_size, calculated_max_font)
        adjusted_font_size = max(36, adjusted_font_size)  # Minimum 36px font size
        print(f"[Intro] Requested font: {intro_font_size}px, Adjusted: {adjusted_font_size}px")

        # Fade out effect timing (fade starts 0.2s before end)
        fade_out_start = max(0.1, intro_duration - 0.2)
        alpha_expr = f"if(gt(t,{fade_out_start}),(({intro_duration}-t)/0.2),1)"

        escaped_text = display_text.replace("'", "\\'").replace(":", "\\:")

        # Calculate vertical position based on intro_position
        if intro_position == "top":
            y_expr = "h*0.15"  # 15% from top
        elif intro_position == "bottom":
            y_expr = "h*0.80-text_h"  # 80% from top (above subtitle area)
        else:  # center
            y_expr = "(h-text_h)/2"  # Center vertically

        # Draw text on screen during freeze frame
        drawtext_parts = [
            f"text='{escaped_text}'",
            f"fontsize={adjusted_font_size}",
            "fontcolor=white",
            "x=(w-text_w)/2",  # Center horizontally
            f"y={y_expr}",  # Vertical position based on intro_position
            f"enable='lt(t,{intro_duration})'",
            "borderw=4",
            "bordercolor=black",
            "box=1",
            "boxcolor=black@0.7",
            "boxborderw=20",
            f"alpha='{alpha_expr}'",
            "line_spacing=10",  # Add spacing between lines
        ]

        if font_file:
            drawtext_parts.insert(1, f"fontfile='{font_file}'")

        video_filters.append(f"drawtext={':'.join(drawtext_parts)}")

    # Combine video filters
    video_filter_str = ",".join(video_filters) if video_filters else None

    # Construct FFmpeg command
    if filter_parts or video_filter_str:
        if filter_parts and video_filter_str:
            full_filter = ";".join(filter_parts) + f";[0:v]{video_filter_str}[vout]"
            cmd.extend(["-filter_complex", full_filter])
            cmd.extend(["-map", "[vout]"])
            if audio_output and audio_output.startswith("["):
                cmd.extend(["-map", audio_output])
            elif audio_output:
                cmd.extend(["-map", audio_output])
        elif video_filter_str:
            cmd.extend(["-vf", video_filter_str])
            if bgm_path and os.path.exists(bgm_path):
                cmd.extend(["-filter_complex", f"[1:a]volume={bgm_volume}[audio]"])
                cmd.extend(["-map", "0:v", "-map", "[audio]"])
            elif not keep_original_audio:
                cmd.extend(["-an"])  # No audio
        elif filter_parts:
            cmd.extend(["-filter_complex", ";".join(filter_parts)])
            cmd.extend(["-map", "0:v"])
            if audio_output and audio_output.startswith("["):
                cmd.extend(["-map", audio_output])
    else:
        if not keep_original_audio:
            cmd.extend(["-an"])

    # Output settings
    cmd.extend([
        "-c:v", "libx264",
        "-preset", "medium",
        "-crf", "23",
        "-c:a", "aac",
        "-b:a", "128k",
        "-shortest",
        output_path
    ])

    try:
        # Run FFmpeg in thread pool to avoid blocking the event loop
        result = await asyncio.to_thread(
            subprocess.run,
            cmd,
            capture_output=True,
            text=True,
            timeout=600,  # 10 minute timeout
        )

        if result.returncode != 0:
            error_msg = result.stderr[-500:] if result.stderr else "Unknown error"
            return False, f"FFmpeg error: {error_msg}"

        if os.path.exists(output_path):
            return True, "Video processing complete"
        else:
            return False, "Output file not created"

    except subprocess.TimeoutExpired:
        return False, "Processing timed out"
    except Exception as e:
        return False, f"Processing error: {str(e)}"


async def get_video_duration(video_path: str) -> Optional[float]:
    """Get video duration in seconds."""
    cmd = [
        "ffprobe",
        "-v", "error",
        "-show_entries", "format=duration",
        "-of", "default=noprint_wrappers=1:nokey=1",
        video_path
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        if result.returncode == 0:
            return float(result.stdout.strip())
    except Exception:
        pass

    return None


async def get_video_info(video_path: str) -> Optional[dict]:
    """Get video information (duration, resolution, etc.)."""
    import json as json_module

    cmd = [
        "ffprobe",
        "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=width,height,duration:format=duration",
        "-of", "json",
        video_path
    ]

    try:
        result = await asyncio.to_thread(
            subprocess.run,
            cmd,
            capture_output=True,
            text=True,
            timeout=30,
        )
        if result.returncode == 0:
            data = json_module.loads(result.stdout)
            info = {}

            # Get duration from format (more reliable)
            if "format" in data and "duration" in data["format"]:
                info["duration"] = float(data["format"]["duration"])

            # Get resolution from stream
            if "streams" in data and len(data["streams"]) > 0:
                stream = data["streams"][0]
                info["width"] = stream.get("width")
                info["height"] = stream.get("height")

            return info if info else None
    except Exception:
        pass

    return None


async def trim_video(
    input_path: str,
    output_path: str,
    start_time: float,
    end_time: float,
    exclude_regions: list = None,
) -> Tuple[bool, str]:
    """
    Trim video to specified time range, optionally excluding middle sections.

    Args:
        input_path: Path to input video
        output_path: Path for output video
        start_time: Start time in seconds
        end_time: End time in seconds
        exclude_regions: List of dicts with 'start' and 'end' keys for sections to remove

    Returns:
        Tuple of (success, message)
    """
    if not os.path.exists(input_path):
        return False, f"Input video not found: {input_path}"

    # Validate time range
    duration = await get_video_duration(input_path)
    if duration is None:
        return False, "Could not get video duration"

    if start_time < 0:
        start_time = 0
    if end_time > duration:
        end_time = duration
    if start_time >= end_time:
        return False, f"Invalid time range: start ({start_time}) >= end ({end_time})"

    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # If there are exclude regions, use the complex concat approach
    if exclude_regions and len(exclude_regions) > 0:
        return await _trim_with_exclude_regions(
            input_path, output_path, start_time, end_time, exclude_regions
        )

    # Simple trim without exclude regions
    trim_duration = end_time - start_time

    # Log trim parameters for debugging
    print(f"[Trim] Input: {input_path}")
    print(f"[Trim] Original duration: {duration:.3f}s")
    print(f"[Trim] Requested: start={start_time:.3f}s, end={end_time:.3f}s")
    print(f"[Trim] Output duration should be: {trim_duration:.3f}s")

    # Use -ss BEFORE -i for input seeking (faster and more reliable for end trimming)
    # Combined with -t for accurate duration control
    # -accurate_seek ensures frame-accurate seeking
    cmd = [
        "ffmpeg", "-y",
        "-accurate_seek",                # Enable accurate seeking
        "-ss", str(start_time),          # Input seeking (before -i)
        "-i", input_path,
        "-t", str(trim_duration),        # Duration of output
        "-c:v", "libx264",               # Re-encode video for accurate cut
        "-preset", "fast",               # Fast encoding preset
        "-crf", "18",                    # High quality (lower = better)
        "-c:a", "aac",                   # Re-encode audio
        "-b:a", "128k",                  # Audio bitrate
        "-avoid_negative_ts", "make_zero",  # Fix timestamp issues
        output_path
    ]

    print(f"[Trim] Command: {' '.join(cmd)}")

    try:
        result = await asyncio.to_thread(
            subprocess.run,
            cmd,
            capture_output=True,
            text=True,
            timeout=120,
        )

        if result.returncode != 0:
            error_msg = result.stderr[-300:] if result.stderr else "Unknown error"
            print(f"[Trim] FFmpeg error: {error_msg}")
            return False, f"Trim failed: {error_msg}"

        if os.path.exists(output_path):
            new_duration = await get_video_duration(output_path)
            print(f"[Trim] Success! New duration: {new_duration:.3f}s (expected: {trim_duration:.3f}s)")
            print(f"[Trim] Difference from expected: {abs(new_duration - trim_duration):.3f}s")
            return True, f"Video trimmed successfully ({new_duration:.1f}s)"
        else:
            print("[Trim] Error: Output file not created")
            return False, "Output file not created"

    except subprocess.TimeoutExpired:
        print("[Trim] Error: Timeout")
        return False, "Trim operation timed out"
    except Exception as e:
        print(f"[Trim] Error: {str(e)}")
        return False, f"Trim error: {str(e)}"


async def _trim_with_exclude_regions(
    input_path: str,
    output_path: str,
    start_time: float,
    end_time: float,
    exclude_regions: list,
) -> Tuple[bool, str]:
    """
    Trim video with exclude regions - cuts out specified sections and concatenates remaining parts.

    Uses FFmpeg's filter_complex with trim and concat filters.
    """
    import tempfile

    print(f"[Trim] Trimming with {len(exclude_regions)} exclude regions")
    print(f"[Trim] Main range: {start_time:.3f}s - {end_time:.3f}s")
    for i, region in enumerate(exclude_regions):
        print(f"[Trim] Exclude region {i}: {region['start']:.3f}s - {region['end']:.3f}s")

    # Sort and merge overlapping exclude regions
    sorted_regions = sorted(exclude_regions, key=lambda r: r['start'])
    merged_regions = []
    for region in sorted_regions:
        # Clip region to main trim range
        region_start = max(region['start'], start_time)
        region_end = min(region['end'], end_time)
        if region_start >= region_end:
            continue  # Skip invalid regions

        if merged_regions and region_start <= merged_regions[-1]['end']:
            merged_regions[-1]['end'] = max(merged_regions[-1]['end'], region_end)
        else:
            merged_regions.append({'start': region_start, 'end': region_end})

    if not merged_regions:
        # No valid exclude regions, use simple trim
        print("[Trim] No valid exclude regions after merging, using simple trim")
        return await trim_video(input_path, output_path, start_time, end_time, None)

    # Calculate keep segments (inverse of exclude regions)
    keep_segments = []
    current_pos = start_time

    for region in merged_regions:
        if current_pos < region['start']:
            keep_segments.append({'start': current_pos, 'end': region['start']})
        current_pos = region['end']

    # Add final segment if there's remaining time
    if current_pos < end_time:
        keep_segments.append({'start': current_pos, 'end': end_time})

    if not keep_segments:
        return False, "No video segments remaining after excluding regions"

    print(f"[Trim] Keep segments: {keep_segments}")

    # Calculate expected output duration
    expected_duration = sum(seg['end'] - seg['start'] for seg in keep_segments)
    print(f"[Trim] Expected output duration: {expected_duration:.3f}s")

    # Build FFmpeg filter_complex for concatenation
    # Each segment needs: trim, setpts for video; atrim, asetpts for audio
    video_filters = []
    audio_filters = []
    segment_labels = []

    for i, seg in enumerate(keep_segments):
        seg_duration = seg['end'] - seg['start']
        # Video filter: trim and reset timestamps
        video_filters.append(
            f"[0:v]trim=start={seg['start']:.6f}:end={seg['end']:.6f},setpts=PTS-STARTPTS[v{i}]"
        )
        # Audio filter: atrim and reset timestamps
        audio_filters.append(
            f"[0:a]atrim=start={seg['start']:.6f}:end={seg['end']:.6f},asetpts=PTS-STARTPTS[a{i}]"
        )
        segment_labels.append(f"[v{i}][a{i}]")

    # Concat filter
    concat_input = "".join(segment_labels)
    filter_complex = ";".join(video_filters + audio_filters)
    filter_complex += f";{concat_input}concat=n={len(keep_segments)}:v=1:a=1[outv][outa]"

    cmd = [
        "ffmpeg", "-y",
        "-i", input_path,
        "-filter_complex", filter_complex,
        "-map", "[outv]",
        "-map", "[outa]",
        "-c:v", "libx264",
        "-preset", "fast",
        "-crf", "18",
        "-c:a", "aac",
        "-b:a", "128k",
        "-avoid_negative_ts", "make_zero",
        output_path
    ]

    print(f"[Trim] Command: ffmpeg -y -i {input_path} -filter_complex [complex] -map [outv] -map [outa] ...")
    print(f"[Trim] Filter complex: {filter_complex[:200]}..." if len(filter_complex) > 200 else f"[Trim] Filter complex: {filter_complex}")

    try:
        result = await asyncio.to_thread(
            subprocess.run,
            cmd,
            capture_output=True,
            text=True,
            timeout=300,  # Longer timeout for complex operations
        )

        if result.returncode != 0:
            error_msg = result.stderr[-500:] if result.stderr else "Unknown error"
            print(f"[Trim] FFmpeg error: {error_msg}")
            return False, f"Trim with exclude regions failed: {error_msg}"

        if os.path.exists(output_path):
            new_duration = await get_video_duration(output_path)
            print(f"[Trim] Success! New duration: {new_duration:.3f}s (expected: {expected_duration:.3f}s)")
            return True, f"Video trimmed successfully ({new_duration:.1f}s, excluded {len(merged_regions)} regions)"
        else:
            print("[Trim] Error: Output file not created")
            return False, "Output file not created"

    except subprocess.TimeoutExpired:
        print("[Trim] Error: Timeout")
        return False, "Trim operation timed out"
    except Exception as e:
        print(f"[Trim] Error: {str(e)}")
        return False, f"Trim error: {str(e)}"


async def extract_frame(
    video_path: str,
    output_path: str,
    timestamp: float,
) -> Tuple[bool, str]:
    """
    Extract a single frame from video at specified timestamp.

    Args:
        video_path: Path to input video
        output_path: Path for output image (jpg/png)
        timestamp: Time in seconds

    Returns:
        Tuple of (success, message)
    """
    if not os.path.exists(video_path):
        return False, f"Video not found: {video_path}"

    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    cmd = [
        "ffmpeg", "-y",
        "-ss", str(timestamp),
        "-i", video_path,
        "-frames:v", "1",
        "-q:v", "2",
        output_path
    ]

    try:
        result = await asyncio.to_thread(
            subprocess.run,
            cmd,
            capture_output=True,
            text=True,
            timeout=30,
        )

        if result.returncode == 0 and os.path.exists(output_path):
            return True, "Frame extracted"
        return False, result.stderr[-200:] if result.stderr else "Unknown error"
    except Exception as e:
        return False, str(e)


async def get_audio_duration(audio_path: str) -> Optional[float]:
    """Get audio duration in seconds."""
    return await get_video_duration(audio_path)  # Same command works


async def extract_audio(video_path: str, output_path: str) -> Tuple[bool, str]:
    """Extract audio from video."""
    cmd = [
        "ffmpeg", "-y",
        "-i", video_path,
        "-vn",
        "-acodec", "pcm_s16le",
        "-ar", "16000",
        "-ac", "1",
        output_path
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
        if result.returncode == 0:
            return True, "Audio extracted"
        return False, result.stderr
    except Exception as e:
        return False, str(e)


async def extract_audio_with_noise_reduction(
    video_path: str,
    output_path: str,
    noise_reduction_level: str = "medium"
) -> Tuple[bool, str]:
    """
    Extract audio from video with noise reduction for better STT accuracy.

    Args:
        video_path: Path to input video
        output_path: Path for output audio (WAV format recommended)
        noise_reduction_level: "light", "medium", or "heavy"

    Returns:
        Tuple of (success, message)
    """
    if not os.path.exists(video_path):
        return False, f"Video file not found: {video_path}"

    # Build audio filter chain based on noise reduction level
    filters = []

    # 1. High-pass filter: Remove low frequency rumble (< 80Hz)
    filters.append("highpass=f=80")

    # 2. Low-pass filter: Remove high frequency hiss (> 8000Hz for speech)
    filters.append("lowpass=f=8000")

    if noise_reduction_level == "light":
        # Light: Just basic frequency filtering
        pass

    elif noise_reduction_level == "medium":
        # Medium: Add FFT-based denoiser
        # afftdn: nr=noise reduction amount (0-100), nf=noise floor
        filters.append("afftdn=nf=-25:nr=10:nt=w")

    elif noise_reduction_level == "heavy":
        # Heavy: More aggressive noise reduction
        filters.append("afftdn=nf=-20:nr=20:nt=w")
        # Add dynamic range compression to normalize volume
        filters.append("acompressor=threshold=-20dB:ratio=4:attack=5:release=50")

    # 3. Normalize audio levels
    filters.append("loudnorm=I=-16:TP=-1.5:LRA=11")

    filter_chain = ",".join(filters)

    cmd = [
        "ffmpeg", "-y",
        "-i", video_path,
        "-vn",  # No video
        "-af", filter_chain,
        "-acodec", "pcm_s16le",  # PCM format for Whisper
        "-ar", "16000",  # 16kHz sample rate (Whisper optimal)
        "-ac", "1",  # Mono
        output_path
    ]

    try:
        # Run FFmpeg in thread pool to avoid blocking the event loop
        result = await asyncio.to_thread(
            subprocess.run,
            cmd,
            capture_output=True,
            text=True,
            timeout=120,
        )

        if result.returncode != 0:
            error_msg = result.stderr[-300:] if result.stderr else "Unknown error"
            return False, f"Audio extraction failed: {error_msg}"

        if os.path.exists(output_path):
            return True, f"Audio extracted with {noise_reduction_level} noise reduction"
        else:
            return False, "Output file not created"

    except subprocess.TimeoutExpired:
        return False, "Audio extraction timed out"
    except Exception as e:
        return False, f"Audio extraction error: {str(e)}"


async def analyze_audio_noise_level(audio_path: str) -> Optional[dict]:
    """
    Analyze audio to detect noise level.

    Returns dict with mean_volume, max_volume, noise_floor estimates.
    """
    cmd = [
        "ffmpeg",
        "-i", audio_path,
        "-af", "volumedetect",
        "-f", "null",
        "-"
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
        stderr = result.stderr

        # Parse volume detection output
        info = {}
        for line in stderr.split('\n'):
            if 'mean_volume' in line:
                info['mean_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))
            elif 'max_volume' in line:
                info['max_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))

        return info if info else None

    except Exception:
        return None


async def has_audio_stream(video_path: str) -> bool:
    """
    Check if video file has an audio stream.

    Returns:
        True if video has audio, False otherwise
    """
    cmd = [
        "ffprobe",
        "-v", "error",
        "-select_streams", "a",  # Select only audio streams
        "-show_entries", "stream=codec_type",
        "-of", "csv=p=0",
        video_path
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        # If there's audio, ffprobe will output "audio"
        return "audio" in result.stdout.lower()
    except Exception:
        return False


async def get_audio_volume_info(video_path: str) -> Optional[dict]:
    """
    Get audio volume information to detect silent audio.

    Returns:
        dict with mean_volume, or None if no audio or error
    """
    # First check if audio stream exists
    if not await has_audio_stream(video_path):
        return None

    cmd = [
        "ffmpeg",
        "-i", video_path,
        "-af", "volumedetect",
        "-f", "null",
        "-"
    ]

    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
        stderr = result.stderr

        info = {}
        for line in stderr.split('\n'):
            if 'mean_volume' in line:
                info['mean_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))
            elif 'max_volume' in line:
                info['max_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))

        return info if info else None

    except Exception:
        return None


def is_audio_silent(volume_info: Optional[dict], threshold_db: float = -50.0) -> bool:
    """
    Check if audio is effectively silent (below threshold).

    Args:
        volume_info: dict from get_audio_volume_info
        threshold_db: Volume below this is considered silent (default -50dB)

    Returns:
        True if silent or no audio, False otherwise
    """
    if not volume_info:
        return True

    mean_volume = volume_info.get('mean_volume', -100)
    return mean_volume < threshold_db