import subprocess import os import re from typing import Optional, Tuple from app.config import settings def detect_platform(url: str) -> str: """Detect video platform from URL.""" if "douyin" in url or "iesdouyin" in url: return "douyin" elif "kuaishou" in url or "gifshow" in url: return "kuaishou" elif "bilibili" in url: return "bilibili" elif "youtube" in url or "youtu.be" in url: return "youtube" elif "tiktok" in url: return "tiktok" else: return "unknown" def sanitize_filename(filename: str) -> str: """Sanitize filename to be safe for filesystem.""" # Remove or replace invalid characters filename = re.sub(r'[<>:"/\\|?*]', '_', filename) # Limit length if len(filename) > 100: filename = filename[:100] return filename def get_cookies_path(platform: str) -> Optional[str]: """Get cookies file path for a platform.""" cookies_dir = os.path.join(os.path.dirname(settings.DOWNLOAD_DIR), "cookies") # Check for platform-specific cookies first (e.g., douyin.txt) platform_cookies = os.path.join(cookies_dir, f"{platform}.txt") if os.path.exists(platform_cookies): return platform_cookies # Check for generic cookies.txt generic_cookies = os.path.join(cookies_dir, "cookies.txt") if os.path.exists(generic_cookies): return generic_cookies return None async def download_video(url: str, job_id: str) -> Tuple[bool, str, Optional[str]]: """ Download video using yt-dlp. Returns: Tuple of (success, message, video_path) """ output_dir = os.path.join(settings.DOWNLOAD_DIR, job_id) os.makedirs(output_dir, exist_ok=True) output_template = os.path.join(output_dir, "%(title).50s.%(ext)s") # yt-dlp command with options for Chinese platforms cmd = [ "yt-dlp", "--no-playlist", "-f", "best[ext=mp4]/best", "--merge-output-format", "mp4", "-o", output_template, "--no-check-certificate", "--socket-timeout", "30", "--retries", "3", "--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", ] platform = detect_platform(url) # Add cookies if available (required for Douyin, Kuaishou) cookies_path = get_cookies_path(platform) if cookies_path: cmd.extend(["--cookies", cookies_path]) print(f"Using cookies from: {cookies_path}") elif platform in ["douyin", "kuaishou", "bilibili"]: # Try to use browser cookies if no cookies file # Priority: Chrome > Firefox > Edge cmd.extend(["--cookies-from-browser", "chrome"]) print(f"Using cookies from Chrome browser for {platform}") # Platform-specific options if platform in ["douyin", "kuaishou"]: # Use browser impersonation for anti-bot bypass cmd.extend([ "--impersonate", "chrome-123:macos-14", "--extractor-args", "generic:impersonate", ]) # Add proxy if configured (for geo-restricted platforms) if settings.PROXY_URL: cmd.extend(["--proxy", settings.PROXY_URL]) print(f"Using proxy: {settings.PROXY_URL}") cmd.append(url) try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=300, # 5 minute timeout ) if result.returncode != 0: error_msg = result.stderr or result.stdout or "Unknown error" return False, f"Download failed: {error_msg}", None # Find the downloaded file for file in os.listdir(output_dir): if file.endswith((".mp4", ".webm", ".mkv")): video_path = os.path.join(output_dir, file) return True, "Download successful", video_path return False, "No video file found after download", None except subprocess.TimeoutExpired: return False, "Download timed out (5 minutes)", None except Exception as e: return False, f"Download error: {str(e)}", None def get_video_info(url: str) -> Optional[dict]: """Get video metadata without downloading.""" cmd = [ "yt-dlp", "-j", # JSON output "--no-download", ] # Add proxy if configured if settings.PROXY_URL: cmd.extend(["--proxy", settings.PROXY_URL]) cmd.append(url) try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=60, ) if result.returncode == 0: import json return json.loads(result.stdout) except Exception: pass return None