Initial commit: YouTube Shorts maker application
Features: - Video download from TikTok/Douyin using yt-dlp - Audio transcription with OpenAI Whisper - GPT-4 translation (direct/summarize/rewrite modes) - Subtitle generation with ASS format - Video trimming with frame-accurate preview - BGM integration with volume control - Intro text overlay support - Thumbnail generation with text overlay Tech stack: - Backend: FastAPI, Python 3.11+ - Frontend: React, Vite, TailwindCSS - Video processing: FFmpeg - AI: OpenAI Whisper, GPT-4 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
158
backend/app/services/downloader.py
Normal file
158
backend/app/services/downloader.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import subprocess
|
||||
import os
|
||||
import re
|
||||
from typing import Optional, Tuple
|
||||
from app.config import settings
|
||||
|
||||
|
||||
def detect_platform(url: str) -> str:
|
||||
"""Detect video platform from URL."""
|
||||
if "douyin" in url or "iesdouyin" in url:
|
||||
return "douyin"
|
||||
elif "kuaishou" in url or "gifshow" in url:
|
||||
return "kuaishou"
|
||||
elif "bilibili" in url:
|
||||
return "bilibili"
|
||||
elif "youtube" in url or "youtu.be" in url:
|
||||
return "youtube"
|
||||
elif "tiktok" in url:
|
||||
return "tiktok"
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def sanitize_filename(filename: str) -> str:
|
||||
"""Sanitize filename to be safe for filesystem."""
|
||||
# Remove or replace invalid characters
|
||||
filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
|
||||
# Limit length
|
||||
if len(filename) > 100:
|
||||
filename = filename[:100]
|
||||
return filename
|
||||
|
||||
|
||||
def get_cookies_path(platform: str) -> Optional[str]:
|
||||
"""Get cookies file path for a platform."""
|
||||
cookies_dir = os.path.join(os.path.dirname(settings.DOWNLOAD_DIR), "cookies")
|
||||
|
||||
# Check for platform-specific cookies first (e.g., douyin.txt)
|
||||
platform_cookies = os.path.join(cookies_dir, f"{platform}.txt")
|
||||
if os.path.exists(platform_cookies):
|
||||
return platform_cookies
|
||||
|
||||
# Check for generic cookies.txt
|
||||
generic_cookies = os.path.join(cookies_dir, "cookies.txt")
|
||||
if os.path.exists(generic_cookies):
|
||||
return generic_cookies
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def download_video(url: str, job_id: str) -> Tuple[bool, str, Optional[str]]:
|
||||
"""
|
||||
Download video using yt-dlp.
|
||||
|
||||
Returns:
|
||||
Tuple of (success, message, video_path)
|
||||
"""
|
||||
output_dir = os.path.join(settings.DOWNLOAD_DIR, job_id)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
output_template = os.path.join(output_dir, "%(title).50s.%(ext)s")
|
||||
|
||||
# yt-dlp command with options for Chinese platforms
|
||||
cmd = [
|
||||
"yt-dlp",
|
||||
"--no-playlist",
|
||||
"-f", "best[ext=mp4]/best",
|
||||
"--merge-output-format", "mp4",
|
||||
"-o", output_template,
|
||||
"--no-check-certificate",
|
||||
"--socket-timeout", "30",
|
||||
"--retries", "3",
|
||||
"--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
||||
]
|
||||
|
||||
platform = detect_platform(url)
|
||||
|
||||
# Add cookies if available (required for Douyin, Kuaishou)
|
||||
cookies_path = get_cookies_path(platform)
|
||||
if cookies_path:
|
||||
cmd.extend(["--cookies", cookies_path])
|
||||
print(f"Using cookies from: {cookies_path}")
|
||||
elif platform in ["douyin", "kuaishou", "bilibili"]:
|
||||
# Try to use browser cookies if no cookies file
|
||||
# Priority: Chrome > Firefox > Edge
|
||||
cmd.extend(["--cookies-from-browser", "chrome"])
|
||||
print(f"Using cookies from Chrome browser for {platform}")
|
||||
|
||||
# Platform-specific options
|
||||
if platform in ["douyin", "kuaishou"]:
|
||||
# Use browser impersonation for anti-bot bypass
|
||||
cmd.extend([
|
||||
"--impersonate", "chrome-123:macos-14",
|
||||
"--extractor-args", "generic:impersonate",
|
||||
])
|
||||
|
||||
# Add proxy if configured (for geo-restricted platforms)
|
||||
if settings.PROXY_URL:
|
||||
cmd.extend(["--proxy", settings.PROXY_URL])
|
||||
print(f"Using proxy: {settings.PROXY_URL}")
|
||||
|
||||
cmd.append(url)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300, # 5 minute timeout
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
error_msg = result.stderr or result.stdout or "Unknown error"
|
||||
return False, f"Download failed: {error_msg}", None
|
||||
|
||||
# Find the downloaded file
|
||||
for file in os.listdir(output_dir):
|
||||
if file.endswith((".mp4", ".webm", ".mkv")):
|
||||
video_path = os.path.join(output_dir, file)
|
||||
return True, "Download successful", video_path
|
||||
|
||||
return False, "No video file found after download", None
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, "Download timed out (5 minutes)", None
|
||||
except Exception as e:
|
||||
return False, f"Download error: {str(e)}", None
|
||||
|
||||
|
||||
def get_video_info(url: str) -> Optional[dict]:
|
||||
"""Get video metadata without downloading."""
|
||||
cmd = [
|
||||
"yt-dlp",
|
||||
"-j", # JSON output
|
||||
"--no-download",
|
||||
]
|
||||
|
||||
# Add proxy if configured
|
||||
if settings.PROXY_URL:
|
||||
cmd.extend(["--proxy", settings.PROXY_URL])
|
||||
|
||||
cmd.append(url)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
import json
|
||||
return json.loads(result.stdout)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user