Features: - Video download from TikTok/Douyin using yt-dlp - Audio transcription with OpenAI Whisper - GPT-4 translation (direct/summarize/rewrite modes) - Subtitle generation with ASS format - Video trimming with frame-accurate preview - BGM integration with volume control - Intro text overlay support - Thumbnail generation with text overlay Tech stack: - Backend: FastAPI, Python 3.11+ - Frontend: React, Vite, TailwindCSS - Video processing: FFmpeg - AI: OpenAI Whisper, GPT-4 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
159 lines
4.6 KiB
Python
159 lines
4.6 KiB
Python
import subprocess
|
|
import os
|
|
import re
|
|
from typing import Optional, Tuple
|
|
from app.config import settings
|
|
|
|
|
|
def detect_platform(url: str) -> str:
|
|
"""Detect video platform from URL."""
|
|
if "douyin" in url or "iesdouyin" in url:
|
|
return "douyin"
|
|
elif "kuaishou" in url or "gifshow" in url:
|
|
return "kuaishou"
|
|
elif "bilibili" in url:
|
|
return "bilibili"
|
|
elif "youtube" in url or "youtu.be" in url:
|
|
return "youtube"
|
|
elif "tiktok" in url:
|
|
return "tiktok"
|
|
else:
|
|
return "unknown"
|
|
|
|
|
|
def sanitize_filename(filename: str) -> str:
|
|
"""Sanitize filename to be safe for filesystem."""
|
|
# Remove or replace invalid characters
|
|
filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
|
|
# Limit length
|
|
if len(filename) > 100:
|
|
filename = filename[:100]
|
|
return filename
|
|
|
|
|
|
def get_cookies_path(platform: str) -> Optional[str]:
|
|
"""Get cookies file path for a platform."""
|
|
cookies_dir = os.path.join(os.path.dirname(settings.DOWNLOAD_DIR), "cookies")
|
|
|
|
# Check for platform-specific cookies first (e.g., douyin.txt)
|
|
platform_cookies = os.path.join(cookies_dir, f"{platform}.txt")
|
|
if os.path.exists(platform_cookies):
|
|
return platform_cookies
|
|
|
|
# Check for generic cookies.txt
|
|
generic_cookies = os.path.join(cookies_dir, "cookies.txt")
|
|
if os.path.exists(generic_cookies):
|
|
return generic_cookies
|
|
|
|
return None
|
|
|
|
|
|
async def download_video(url: str, job_id: str) -> Tuple[bool, str, Optional[str]]:
|
|
"""
|
|
Download video using yt-dlp.
|
|
|
|
Returns:
|
|
Tuple of (success, message, video_path)
|
|
"""
|
|
output_dir = os.path.join(settings.DOWNLOAD_DIR, job_id)
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
output_template = os.path.join(output_dir, "%(title).50s.%(ext)s")
|
|
|
|
# yt-dlp command with options for Chinese platforms
|
|
cmd = [
|
|
"yt-dlp",
|
|
"--no-playlist",
|
|
"-f", "best[ext=mp4]/best",
|
|
"--merge-output-format", "mp4",
|
|
"-o", output_template,
|
|
"--no-check-certificate",
|
|
"--socket-timeout", "30",
|
|
"--retries", "3",
|
|
"--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
]
|
|
|
|
platform = detect_platform(url)
|
|
|
|
# Add cookies if available (required for Douyin, Kuaishou)
|
|
cookies_path = get_cookies_path(platform)
|
|
if cookies_path:
|
|
cmd.extend(["--cookies", cookies_path])
|
|
print(f"Using cookies from: {cookies_path}")
|
|
elif platform in ["douyin", "kuaishou", "bilibili"]:
|
|
# Try to use browser cookies if no cookies file
|
|
# Priority: Chrome > Firefox > Edge
|
|
cmd.extend(["--cookies-from-browser", "chrome"])
|
|
print(f"Using cookies from Chrome browser for {platform}")
|
|
|
|
# Platform-specific options
|
|
if platform in ["douyin", "kuaishou"]:
|
|
# Use browser impersonation for anti-bot bypass
|
|
cmd.extend([
|
|
"--impersonate", "chrome-123:macos-14",
|
|
"--extractor-args", "generic:impersonate",
|
|
])
|
|
|
|
# Add proxy if configured (for geo-restricted platforms)
|
|
if settings.PROXY_URL:
|
|
cmd.extend(["--proxy", settings.PROXY_URL])
|
|
print(f"Using proxy: {settings.PROXY_URL}")
|
|
|
|
cmd.append(url)
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300, # 5 minute timeout
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
error_msg = result.stderr or result.stdout or "Unknown error"
|
|
return False, f"Download failed: {error_msg}", None
|
|
|
|
# Find the downloaded file
|
|
for file in os.listdir(output_dir):
|
|
if file.endswith((".mp4", ".webm", ".mkv")):
|
|
video_path = os.path.join(output_dir, file)
|
|
return True, "Download successful", video_path
|
|
|
|
return False, "No video file found after download", None
|
|
|
|
except subprocess.TimeoutExpired:
|
|
return False, "Download timed out (5 minutes)", None
|
|
except Exception as e:
|
|
return False, f"Download error: {str(e)}", None
|
|
|
|
|
|
def get_video_info(url: str) -> Optional[dict]:
|
|
"""Get video metadata without downloading."""
|
|
cmd = [
|
|
"yt-dlp",
|
|
"-j", # JSON output
|
|
"--no-download",
|
|
]
|
|
|
|
# Add proxy if configured
|
|
if settings.PROXY_URL:
|
|
cmd.extend(["--proxy", settings.PROXY_URL])
|
|
|
|
cmd.append(url)
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=60,
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
import json
|
|
return json.loads(result.stdout)
|
|
except Exception:
|
|
pass
|
|
|
|
return None
|