Initial commit: YouTube Shorts maker application

Features: - Video download from TikTok/Douyin using yt-dlp - Audio transcription with OpenAI Whisper - GPT-4 translation (direct/summarize/rewrite modes) - Subtitle generation with ASS format - Video trimming with frame-accurate preview - BGM integration with volume control - Intro text overlay support - Thumbnail generation with text overlay Tech stack: - Backend: FastAPI, Python 3.11+ - Frontend: React, Vite, TailwindCSS - Video processing: FFmpeg - AI: OpenAI Whisper, GPT-4 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 21:38:34 +09:00
commit c3795138da
64 changed files with 13059 additions and 0 deletions
--- a/backend/app/init.py
+++ b/backend/app/init.py
@@ -0,0 +1 @@
+# Shorts Maker Backend
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -0,0 +1,53 @@
+from pydantic_settings import BaseSettings
+from functools import lru_cache
+
+
+class Settings(BaseSettings):
+    # API Keys
+    OPENAI_API_KEY: str = ""
+    PIXABAY_API_KEY: str = ""  # Optional: for Pixabay music search
+    FREESOUND_API_KEY: str = ""  # Optional: for Freesound API (https://freesound.org/apiv2/apply)
+
+    # Directories
+    DOWNLOAD_DIR: str = "data/downloads"
+    PROCESSED_DIR: str = "data/processed"
+    BGM_DIR: str = "data/bgm"
+
+    # Whisper settings
+    WHISPER_MODEL: str = "medium"  # small, medium, large
+
+    # Redis
+    REDIS_URL: str = "redis://redis:6379/0"
+
+    # OpenAI settings
+    OPENAI_MODEL: str = "gpt-4o-mini"  # gpt-4o-mini, gpt-4o, gpt-4-turbo
+    TRANSLATION_MAX_TOKENS: int = 1000  # Max tokens for translation (cost control)
+    TRANSLATION_MODE: str = "rewrite"  # direct, summarize, rewrite
+
+    # GPT Prompt Customization
+    GPT_ROLE: str = "친근한 유튜브 쇼츠 자막 작가"  # GPT persona/role
+    GPT_TONE: str = "존댓말"  # 존댓말, 반말, 격식체
+    GPT_STYLE: str = ""  # Additional style instructions (optional)
+
+    # Processing
+    DEFAULT_FONT_SIZE: int = 24
+    DEFAULT_FONT_COLOR: str = "white"
+    DEFAULT_BGM_VOLUME: float = 0.3
+
+    # Server
+    PORT: int = 3000  # Frontend port
+
+    # Proxy (for geo-restricted platforms like Douyin)
+    PROXY_URL: str = ""  # http://host:port or socks5://host:port
+
+    class Config:
+        env_file = "../.env"  # Project root .env file
+        extra = "ignore"  # Ignore extra fields in .env
+
+
+@lru_cache()
+def get_settings():
+    return Settings()
+
+
+settings = get_settings()
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -0,0 +1,64 @@
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from contextlib import asynccontextmanager
+import os
+
+from app.routers import download, process, bgm, jobs, fonts
+from app.config import settings
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    os.makedirs(settings.DOWNLOAD_DIR, exist_ok=True)
+    os.makedirs(settings.PROCESSED_DIR, exist_ok=True)
+    os.makedirs(settings.BGM_DIR, exist_ok=True)
+
+    # Check BGM status on startup
+    bgm_files = []
+    if os.path.exists(settings.BGM_DIR):
+        bgm_files = [f for f in os.listdir(settings.BGM_DIR) if f.endswith(('.mp3', '.wav', '.m4a', '.ogg'))]
+
+    if len(bgm_files) == 0:
+        print("[Startup] No BGM files found. Upload BGM via /api/bgm/upload or download from Pixabay/Mixkit")
+    else:
+        names = ', '.join(bgm_files[:3])
+        suffix = f'... (+{len(bgm_files) - 3} more)' if len(bgm_files) > 3 else ''
+        print(f"[Startup] Found {len(bgm_files)} BGM files: {names}{suffix}")
+
+    yield
+    # Shutdown
+
+
+app = FastAPI(
+    title="Shorts Maker API",
+    description="중국 쇼츠 영상을 한글 자막으로 변환하는 서비스",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Static files for processed videos
+app.mount("/static/downloads", StaticFiles(directory="data/downloads"), name="downloads")
+app.mount("/static/processed", StaticFiles(directory="data/processed"), name="processed")
+app.mount("/static/bgm", StaticFiles(directory="data/bgm"), name="bgm")
+
+# Routers
+app.include_router(download.router, prefix="/api/download", tags=["Download"])
+app.include_router(process.router, prefix="/api/process", tags=["Process"])
+app.include_router(bgm.router, prefix="/api/bgm", tags=["BGM"])
+app.include_router(jobs.router, prefix="/api/jobs", tags=["Jobs"])
+app.include_router(fonts.router, prefix="/api/fonts", tags=["Fonts"])
+
+
+@app.get("/api/health")
+async def health_check():
+    return {"status": "healthy", "service": "shorts-maker"}
--- a/backend/app/models/init.py
+++ b/backend/app/models/init.py
@@ -0,0 +1,2 @@
+from app.models.schemas import *
+from app.models.job_store import job_store
--- a/backend/app/models/job_store.py
+++ b/backend/app/models/job_store.py
@@ -0,0 +1,91 @@
+from typing import Dict, Optional
+from datetime import datetime
+import uuid
+import json
+import os
+from app.models.schemas import JobInfo, JobStatus
+
+
+class JobStore:
+    """Simple in-memory job store with file persistence."""
+
+    def __init__(self, persistence_file: str = "data/jobs.json"):
+        self._jobs: Dict[str, JobInfo] = {}
+        self._persistence_file = persistence_file
+        self._load_jobs()
+
+    def _load_jobs(self):
+        """Load jobs from file on startup."""
+        if os.path.exists(self._persistence_file):
+            try:
+                with open(self._persistence_file, "r") as f:
+                    data = json.load(f)
+                    for job_id, job_data in data.items():
+                        job_data["created_at"] = datetime.fromisoformat(job_data["created_at"])
+                        job_data["updated_at"] = datetime.fromisoformat(job_data["updated_at"])
+                        self._jobs[job_id] = JobInfo(**job_data)
+            except Exception:
+                pass
+
+    def _save_jobs(self):
+        """Persist jobs to file."""
+        os.makedirs(os.path.dirname(self._persistence_file), exist_ok=True)
+        data = {}
+        for job_id, job in self._jobs.items():
+            job_dict = job.model_dump()
+            job_dict["created_at"] = job_dict["created_at"].isoformat()
+            job_dict["updated_at"] = job_dict["updated_at"].isoformat()
+            data[job_id] = job_dict
+        with open(self._persistence_file, "w") as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+
+    def create_job(self, original_url: str) -> JobInfo:
+        """Create a new job."""
+        job_id = str(uuid.uuid4())[:8]
+        now = datetime.now()
+        job = JobInfo(
+            job_id=job_id,
+            status=JobStatus.PENDING,
+            created_at=now,
+            updated_at=now,
+            original_url=original_url,
+        )
+        self._jobs[job_id] = job
+        self._save_jobs()
+        return job
+
+    def get_job(self, job_id: str) -> Optional[JobInfo]:
+        """Get a job by ID."""
+        return self._jobs.get(job_id)
+
+    def update_job(self, job_id: str, **kwargs) -> Optional[JobInfo]:
+        """Update a job."""
+        job = self._jobs.get(job_id)
+        if job:
+            for key, value in kwargs.items():
+                if hasattr(job, key):
+                    setattr(job, key, value)
+            job.updated_at = datetime.now()
+            self._save_jobs()
+        return job
+
+    def list_jobs(self, limit: int = 50) -> list[JobInfo]:
+        """List recent jobs."""
+        jobs = sorted(
+            self._jobs.values(),
+            key=lambda j: j.created_at,
+            reverse=True
+        )
+        return jobs[:limit]
+
+    def delete_job(self, job_id: str) -> bool:
+        """Delete a job."""
+        if job_id in self._jobs:
+            del self._jobs[job_id]
+            self._save_jobs()
+            return True
+        return False
+
+
+# Global job store instance
+job_store = JobStore()
--- a/backend/app/models/schemas.py
+++ b/backend/app/models/schemas.py
@@ -0,0 +1,279 @@
+from pydantic import BaseModel, HttpUrl
+from typing import Optional, List
+from enum import Enum
+from datetime import datetime
+
+
+class JobStatus(str, Enum):
+    PENDING = "pending"
+    DOWNLOADING = "downloading"
+    READY_FOR_TRIM = "ready_for_trim"  # Download complete, ready for trimming
+    TRIMMING = "trimming"  # Video trimming in progress
+    EXTRACTING_AUDIO = "extracting_audio"  # Step 2: FFmpeg audio extraction
+    NOISE_REDUCTION = "noise_reduction"  # Step 3: Noise reduction
+    TRANSCRIBING = "transcribing"  # Step 4: Whisper STT
+    TRANSLATING = "translating"  # Step 5: GPT translation
+    AWAITING_REVIEW = "awaiting_review"  # Script ready, waiting for user review before rendering
+    PROCESSING = "processing"  # Step 6: Video composition + BGM
+    COMPLETED = "completed"
+    FAILED = "failed"
+    AWAITING_SUBTITLE = "awaiting_subtitle"  # No audio - waiting for manual subtitle input
+
+
+class DownloadRequest(BaseModel):
+    url: str
+    platform: Optional[str] = None  # auto-detect if not provided
+
+
+class DownloadResponse(BaseModel):
+    job_id: str
+    status: JobStatus
+    message: str
+
+
+class SubtitleStyle(BaseModel):
+    font_size: int = 28
+    font_color: str = "white"
+    outline_color: str = "black"
+    outline_width: int = 2
+    position: str = "bottom"  # top, center, bottom
+    font_name: str = "Pretendard"
+    # Enhanced styling options
+    bold: bool = True  # 굵은 글씨 (가독성 향상)
+    shadow: int = 1  # 그림자 깊이 (0=없음, 1-4)
+    background_box: bool = True  # 불투명 배경 박스로 원본 자막 덮기
+    background_opacity: str = "E0"  # 배경 불투명도 (00=투명, FF=완전불투명, E0=권장)
+    animation: str = "none"  # none, fade, pop (자막 애니메이션)
+
+
+class TranslationModeEnum(str, Enum):
+    DIRECT = "direct"       # 직접 번역 (원본 구조 유지)
+    SUMMARIZE = "summarize" # 요약 후 번역
+    REWRITE = "rewrite"     # 완전 재구성 (권장)
+
+
+class ProcessRequest(BaseModel):
+    job_id: str
+    bgm_id: Optional[str] = None
+    bgm_volume: float = 0.3
+    subtitle_style: Optional[SubtitleStyle] = None
+    keep_original_audio: bool = False
+    translation_mode: Optional[str] = None  # direct, summarize, rewrite (default from settings)
+    use_vocal_separation: bool = False  # Separate vocals from BGM before transcription
+
+
+class ProcessResponse(BaseModel):
+    job_id: str
+    status: JobStatus
+    message: str
+
+
+class TrimRequest(BaseModel):
+    """Request to trim a video to a specific time range."""
+    start_time: float  # Start time in seconds
+    end_time: float  # End time in seconds
+    reprocess: bool = False  # Whether to automatically reprocess after trimming (default: False for manual workflow)
+
+
+class TranscribeRequest(BaseModel):
+    """Request to start transcription (audio extraction + STT + translation)."""
+    translation_mode: Optional[str] = "rewrite"  # direct, summarize, rewrite
+    use_vocal_separation: bool = False  # Separate vocals from BGM before transcription
+
+
+class RenderRequest(BaseModel):
+    """Request to render final video with subtitles and BGM."""
+    bgm_id: Optional[str] = None
+    bgm_volume: float = 0.3
+    subtitle_style: Optional[SubtitleStyle] = None
+    keep_original_audio: bool = False
+    # Intro text overlay (shown at beginning of video for YouTube Shorts thumbnail)
+    intro_text: Optional[str] = None  # Max 10 characters recommended
+    intro_duration: float = 0.7  # Duration of frozen frame with intro text (seconds)
+    intro_font_size: int = 100  # Font size
+
+
+class TrimResponse(BaseModel):
+    """Response after trimming a video."""
+    job_id: str
+    success: bool
+    message: str
+    new_duration: Optional[float] = None
+
+
+class VideoInfoResponse(BaseModel):
+    """Video information for trimming UI."""
+    duration: float
+    width: Optional[int] = None
+    height: Optional[int] = None
+    thumbnail_url: Optional[str] = None
+
+
+class TranscriptSegment(BaseModel):
+    start: float
+    end: float
+    text: str
+    translated: Optional[str] = None
+
+
+class JobInfo(BaseModel):
+    job_id: str
+    status: JobStatus
+    created_at: datetime
+    updated_at: datetime
+    original_url: Optional[str] = None
+    video_path: Optional[str] = None
+    output_path: Optional[str] = None
+    transcript: Optional[List[TranscriptSegment]] = None
+    error: Optional[str] = None
+    progress: int = 0
+    has_audio: Optional[bool] = None  # None = not checked, True = has audio, False = no audio
+    audio_status: Optional[str] = None  # "ok", "no_audio_stream", "audio_silent"
+    detected_language: Optional[str] = None  # Whisper detected language (e.g., "zh", "en", "ko")
+
+
+class BGMInfo(BaseModel):
+    id: str
+    name: str
+    duration: float
+    path: str
+
+
+class BGMUploadResponse(BaseModel):
+    id: str
+    name: str
+    message: str
+
+
+# 한글 폰트 정의
+class FontInfo(BaseModel):
+    """Font information for subtitle styling."""
+    id: str  # 폰트 ID (시스템 폰트명)
+    name: str  # 표시 이름
+    style: str  # 스타일 분류
+    recommended_for: List[str]  # 추천 콘텐츠 유형
+    download_url: Optional[str] = None  # 다운로드 링크
+    license: str = "Free for commercial use"
+
+
+# 쇼츠에서 인기있는 무료 상업용 한글 폰트
+KOREAN_FONTS = {
+    # 기본 시스템 폰트 (대부분의 시스템에 설치됨)
+    "NanumGothic": FontInfo(
+        id="NanumGothic",
+        name="나눔고딕",
+        style="깔끔, 기본",
+        recommended_for=["tutorial", "news", "general"],
+        download_url="https://hangeul.naver.com/font",
+        license="OFL (Open Font License)",
+    ),
+    "NanumGothicBold": FontInfo(
+        id="NanumGothicBold",
+        name="나눔고딕 Bold",
+        style="깔끔, 강조",
+        recommended_for=["tutorial", "news", "general"],
+        download_url="https://hangeul.naver.com/font",
+        license="OFL (Open Font License)",
+    ),
+    "NanumSquareRound": FontInfo(
+        id="NanumSquareRound",
+        name="나눔스퀘어라운드",
+        style="둥글, 친근",
+        recommended_for=["travel", "lifestyle", "vlog"],
+        download_url="https://hangeul.naver.com/font",
+        license="OFL (Open Font License)",
+    ),
+
+    # 인기 무료 폰트 (별도 설치 필요)
+    "Pretendard": FontInfo(
+        id="Pretendard",
+        name="프리텐다드",
+        style="현대적, 깔끔",
+        recommended_for=["tutorial", "tech", "business"],
+        download_url="https://github.com/orioncactus/pretendard",
+        license="OFL (Open Font License)",
+    ),
+    "SpoqaHanSansNeo": FontInfo(
+        id="SpoqaHanSansNeo",
+        name="스포카 한 산스 Neo",
+        style="깔끔, 가독성",
+        recommended_for=["tutorial", "tech", "presentation"],
+        download_url="https://github.com/spoqa/spoqa-han-sans",
+        license="OFL (Open Font License)",
+    ),
+    "GmarketSans": FontInfo(
+        id="GmarketSans",
+        name="G마켓 산스",
+        style="둥글, 친근",
+        recommended_for=["shopping", "review", "lifestyle"],
+        download_url="https://corp.gmarket.com/fonts",
+        license="Free for commercial use",
+    ),
+
+    # 개성있는 폰트
+    "BMDoHyeon": FontInfo(
+        id="BMDoHyeon",
+        name="배민 도현체",
+        style="손글씨, 유머",
+        recommended_for=["comedy", "mukbang", "cooking"],
+        download_url="https://www.woowahan.com/fonts",
+        license="OFL (Open Font License)",
+    ),
+    "BMJua": FontInfo(
+        id="BMJua",
+        name="배민 주아체",
+        style="귀여움, 캐주얼",
+        recommended_for=["cooking", "lifestyle", "kids"],
+        download_url="https://www.woowahan.com/fonts",
+        license="OFL (Open Font License)",
+    ),
+    "Cafe24Ssurround": FontInfo(
+        id="Cafe24Ssurround",
+        name="카페24 써라운드",
+        style="강조, 임팩트",
+        recommended_for=["gaming", "reaction", "highlight"],
+        download_url="https://fonts.cafe24.com/",
+        license="Free for commercial use",
+    ),
+    "Cafe24SsurroundAir": FontInfo(
+        id="Cafe24SsurroundAir",
+        name="카페24 써라운드 에어",
+        style="가벼움, 깔끔",
+        recommended_for=["vlog", "daily", "lifestyle"],
+        download_url="https://fonts.cafe24.com/",
+        license="Free for commercial use",
+    ),
+
+    # 제목/강조용 폰트
+    "BlackHanSans": FontInfo(
+        id="BlackHanSans",
+        name="검은고딕",
+        style="굵음, 강렬",
+        recommended_for=["gaming", "sports", "action"],
+        download_url="https://fonts.google.com/specimen/Black+Han+Sans",
+        license="OFL (Open Font License)",
+    ),
+    "DoHyeon": FontInfo(
+        id="DoHyeon",
+        name="도현",
+        style="손글씨, 자연스러움",
+        recommended_for=["vlog", "cooking", "asmr"],
+        download_url="https://fonts.google.com/specimen/Do+Hyeon",
+        license="OFL (Open Font License)",
+    ),
+}
+
+
+# 콘텐츠 유형별 추천 폰트
+FONT_RECOMMENDATIONS = {
+    "tutorial": ["Pretendard", "SpoqaHanSansNeo", "NanumGothic"],
+    "gaming": ["Cafe24Ssurround", "BlackHanSans", "GmarketSans"],
+    "cooking": ["BMDoHyeon", "BMJua", "DoHyeon"],
+    "comedy": ["BMDoHyeon", "Cafe24Ssurround", "GmarketSans"],
+    "travel": ["NanumSquareRound", "Cafe24SsurroundAir", "GmarketSans"],
+    "news": ["Pretendard", "NanumGothic", "SpoqaHanSansNeo"],
+    "asmr": ["DoHyeon", "NanumSquareRound", "Cafe24SsurroundAir"],
+    "fitness": ["BlackHanSans", "Cafe24Ssurround", "GmarketSans"],
+    "tech": ["Pretendard", "SpoqaHanSansNeo", "NanumGothic"],
+    "lifestyle": ["GmarketSans", "NanumSquareRound", "Cafe24SsurroundAir"],
+}
--- a/backend/app/routers/init.py
+++ b/backend/app/routers/init.py
@@ -0,0 +1 @@
+from app.routers import download, process, bgm, jobs
--- a/backend/app/routers/bgm.py
+++ b/backend/app/routers/bgm.py
@@ -0,0 +1,578 @@
+import os
+import aiofiles
+import httpx
+from typing import List
+from fastapi import APIRouter, UploadFile, File, HTTPException
+from pydantic import BaseModel
+from app.models.schemas import BGMInfo, BGMUploadResponse, TranscriptSegment
+from app.services.video_processor import get_audio_duration
+from app.services.bgm_provider import (
+    get_free_bgm_sources,
+    search_freesound,
+    download_freesound,
+    search_and_download_bgm,
+    BGMSearchResult,
+)
+from app.services.bgm_recommender import (
+    recommend_bgm_for_script,
+    get_preset_recommendation,
+    BGMRecommendation,
+    BGM_PRESETS,
+)
+from app.services.default_bgm import (
+    initialize_default_bgm,
+    get_default_bgm_list,
+    check_default_bgm_status,
+    DEFAULT_BGM_TRACKS,
+)
+from app.config import settings
+
+router = APIRouter()
+
+
+class BGMDownloadRequest(BaseModel):
+    """Request to download BGM from URL."""
+    url: str
+    name: str
+
+
+class BGMRecommendRequest(BaseModel):
+    """Request for BGM recommendation based on script."""
+    segments: List[dict]  # TranscriptSegment as dict
+    use_translated: bool = True
+
+
+class FreesoundSearchRequest(BaseModel):
+    """Request to search Freesound."""
+    query: str
+    min_duration: int = 10
+    max_duration: int = 180
+    page: int = 1
+    page_size: int = 15
+    commercial_only: bool = True  # 상업적 사용 가능한 라이선스만 (CC0, CC-BY)
+
+
+class FreesoundDownloadRequest(BaseModel):
+    """Request to download from Freesound."""
+    sound_id: str
+    name: str  # Custom name for the downloaded file
+
+
+class AutoBGMRequest(BaseModel):
+    """Request for automatic BGM search and download."""
+    keywords: List[str]  # Search keywords (from BGM recommendation)
+    max_duration: int = 120
+    commercial_only: bool = True  # 상업적 사용 가능한 라이선스만
+
+
+@router.get("/", response_model=list[BGMInfo])
+async def list_bgm():
+    """List all available BGM files."""
+    bgm_list = []
+
+    if not os.path.exists(settings.BGM_DIR):
+        return bgm_list
+
+    for filename in os.listdir(settings.BGM_DIR):
+        if filename.endswith((".mp3", ".wav", ".m4a", ".ogg")):
+            filepath = os.path.join(settings.BGM_DIR, filename)
+            bgm_id = os.path.splitext(filename)[0]
+
+            duration = await get_audio_duration(filepath)
+
+            bgm_list.append(BGMInfo(
+                id=bgm_id,
+                name=bgm_id.replace("_", " ").replace("-", " ").title(),
+                duration=duration or 0,
+                path=f"/static/bgm/{filename}"
+            ))
+
+    return bgm_list
+
+
+@router.post("/upload", response_model=BGMUploadResponse)
+async def upload_bgm(
+    file: UploadFile = File(...),
+    name: str | None = None
+):
+    """Upload a new BGM file."""
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No filename provided")
+
+    # Validate file type
+    allowed_extensions = (".mp3", ".wav", ".m4a", ".ogg")
+    if not file.filename.lower().endswith(allowed_extensions):
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid file type. Allowed: {allowed_extensions}"
+        )
+
+    # Generate ID
+    bgm_id = name or os.path.splitext(file.filename)[0]
+    bgm_id = bgm_id.lower().replace(" ", "_")
+
+    # Get extension
+    ext = os.path.splitext(file.filename)[1].lower()
+    filepath = os.path.join(settings.BGM_DIR, f"{bgm_id}{ext}")
+
+    # Save file
+    os.makedirs(settings.BGM_DIR, exist_ok=True)
+
+    async with aiofiles.open(filepath, 'wb') as out_file:
+        content = await file.read()
+        await out_file.write(content)
+
+    return BGMUploadResponse(
+        id=bgm_id,
+        name=name or file.filename,
+        message="BGM uploaded successfully"
+    )
+
+
+@router.delete("/{bgm_id}")
+async def delete_bgm(bgm_id: str):
+    """Delete a BGM file."""
+    for ext in (".mp3", ".wav", ".m4a", ".ogg"):
+        filepath = os.path.join(settings.BGM_DIR, f"{bgm_id}{ext}")
+        if os.path.exists(filepath):
+            os.remove(filepath)
+            return {"message": f"BGM '{bgm_id}' deleted"}
+
+    raise HTTPException(status_code=404, detail="BGM not found")
+
+
+@router.get("/sources/free", response_model=dict)
+async def get_free_sources():
+    """Get list of recommended free BGM sources for commercial use."""
+    sources = get_free_bgm_sources()
+    return {
+        "sources": sources,
+        "notice": "이 소스들은 상업적 사용이 가능한 무료 음악을 제공합니다. 각 사이트의 라이선스를 확인하세요.",
+        "recommended": [
+            {
+                "name": "Pixabay Music",
+                "url": "https://pixabay.com/music/search/",
+                "why": "CC0 라이선스, 저작권 표기 불필요, 쇼츠용 짧은 트랙 많음",
+                "search_tips": ["upbeat", "energetic", "chill", "cinematic", "funny"],
+            },
+            {
+                "name": "Mixkit",
+                "url": "https://mixkit.co/free-stock-music/",
+                "why": "고품질, 카테고리별 정리, 상업적 무료 사용",
+                "search_tips": ["short", "intro", "background"],
+            },
+        ],
+    }
+
+
+@router.post("/download-url", response_model=BGMUploadResponse)
+async def download_bgm_from_url(request: BGMDownloadRequest):
+    """
+    Download BGM from external URL (Pixabay, Mixkit, etc.)
+
+    Use this to download free BGM files directly from their source URLs.
+    """
+    url = request.url
+    name = request.name.lower().replace(" ", "_")
+
+    # Validate URL - allow trusted free music sources
+    allowed_domains = [
+        "pixabay.com",
+        "cdn.pixabay.com",
+        "mixkit.co",
+        "assets.mixkit.co",
+        "uppbeat.io",
+        "freemusicarchive.org",
+    ]
+
+    from urllib.parse import urlparse
+    parsed = urlparse(url)
+    domain = parsed.netloc.lower()
+
+    if not any(allowed in domain for allowed in allowed_domains):
+        raise HTTPException(
+            status_code=400,
+            detail=f"URL must be from allowed sources: {', '.join(allowed_domains)}"
+        )
+
+    try:
+        async with httpx.AsyncClient(follow_redirects=True) as client:
+            response = await client.get(url, timeout=60)
+
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"Failed to download: HTTP {response.status_code}"
+                )
+
+            # Determine file extension
+            content_type = response.headers.get("content-type", "")
+            if "mpeg" in content_type or url.endswith(".mp3"):
+                ext = ".mp3"
+            elif "wav" in content_type or url.endswith(".wav"):
+                ext = ".wav"
+            elif "ogg" in content_type or url.endswith(".ogg"):
+                ext = ".ogg"
+            else:
+                ext = ".mp3"
+
+            # Save file
+            os.makedirs(settings.BGM_DIR, exist_ok=True)
+            filepath = os.path.join(settings.BGM_DIR, f"{name}{ext}")
+
+            async with aiofiles.open(filepath, 'wb') as f:
+                await f.write(response.content)
+
+            return BGMUploadResponse(
+                id=name,
+                name=request.name,
+                message=f"BGM downloaded from {domain}"
+            )
+
+    except httpx.TimeoutException:
+        raise HTTPException(status_code=408, detail="Download timed out")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Download failed: {str(e)}")
+
+
+@router.post("/recommend")
+async def recommend_bgm(request: BGMRecommendRequest):
+    """
+    AI-powered BGM recommendation based on script content.
+
+    Analyzes the script mood and suggests appropriate background music.
+    Returns matched BGM from library if available, otherwise provides search keywords.
+    """
+    # Convert dict to TranscriptSegment
+    segments = [TranscriptSegment(**seg) for seg in request.segments]
+
+    # Get available BGM list
+    available_bgm = []
+    if os.path.exists(settings.BGM_DIR):
+        for filename in os.listdir(settings.BGM_DIR):
+            if filename.endswith((".mp3", ".wav", ".m4a", ".ogg")):
+                bgm_id = os.path.splitext(filename)[0]
+                available_bgm.append({
+                    "id": bgm_id,
+                    "name": bgm_id.replace("_", " ").replace("-", " "),
+                })
+
+    # Get recommendation
+    success, message, recommendation = await recommend_bgm_for_script(
+        segments,
+        available_bgm,
+        use_translated=request.use_translated,
+    )
+
+    if not success:
+        raise HTTPException(status_code=500, detail=message)
+
+    # Build Pixabay search URL
+    search_keywords = "+".join(recommendation.search_keywords[:3])
+    pixabay_url = f"https://pixabay.com/music/search/{search_keywords}/"
+
+    return {
+        "recommendation": {
+            "mood": recommendation.mood,
+            "energy": recommendation.energy,
+            "reasoning": recommendation.reasoning,
+            "suggested_genres": recommendation.suggested_genres,
+            "search_keywords": recommendation.search_keywords,
+        },
+        "matched_bgm": recommendation.matched_bgm_id,
+        "search_urls": {
+            "pixabay": pixabay_url,
+            "mixkit": f"https://mixkit.co/free-stock-music/?q={search_keywords}",
+        },
+        "message": message,
+    }
+
+
+@router.get("/recommend/presets")
+async def get_bgm_presets():
+    """
+    Get predefined BGM presets for common content types.
+
+    Use these presets for quick BGM selection without AI analysis.
+    """
+    presets = {}
+    for content_type, preset_info in BGM_PRESETS.items():
+        presets[content_type] = {
+            "mood": preset_info["mood"],
+            "keywords": preset_info["keywords"],
+            "description": f"Best for {content_type} content",
+        }
+
+    return {
+        "presets": presets,
+        "usage": "Use content_type parameter with /recommend/preset/{content_type}",
+    }
+
+
+@router.get("/recommend/preset/{content_type}")
+async def get_preset_bgm(content_type: str):
+    """
+    Get BGM recommendation for a specific content type.
+
+    Available types: cooking, fitness, tutorial, comedy, travel, asmr, news, gaming
+    """
+    recommendation = get_preset_recommendation(content_type)
+
+    if not recommendation:
+        available_types = list(BGM_PRESETS.keys())
+        raise HTTPException(
+            status_code=404,
+            detail=f"Unknown content type. Available: {', '.join(available_types)}"
+        )
+
+    # Check for matching BGM in library
+    if os.path.exists(settings.BGM_DIR):
+        for filename in os.listdir(settings.BGM_DIR):
+            if filename.endswith((".mp3", ".wav", ".m4a", ".ogg")):
+                bgm_id = os.path.splitext(filename)[0]
+                bgm_name = bgm_id.lower()
+
+                # Check if any keyword matches
+                for keyword in recommendation.search_keywords:
+                    if keyword in bgm_name:
+                        recommendation.matched_bgm_id = bgm_id
+                        break
+
+                if recommendation.matched_bgm_id:
+                    break
+
+    search_keywords = "+".join(recommendation.search_keywords[:3])
+
+    return {
+        "content_type": content_type,
+        "recommendation": {
+            "mood": recommendation.mood,
+            "energy": recommendation.energy,
+            "suggested_genres": recommendation.suggested_genres,
+            "search_keywords": recommendation.search_keywords,
+        },
+        "matched_bgm": recommendation.matched_bgm_id,
+        "search_urls": {
+            "pixabay": f"https://pixabay.com/music/search/{search_keywords}/",
+            "mixkit": f"https://mixkit.co/free-stock-music/?q={search_keywords}",
+        },
+    }
+
+
+@router.post("/freesound/search")
+async def search_freesound_api(request: FreesoundSearchRequest):
+    """
+    Search for music on Freesound.
+
+    Freesound API provides 500,000+ CC licensed sounds.
+    Get your API key at: https://freesound.org/apiv2/apply
+
+    Set commercial_only=true (default) to only return CC0 licensed sounds
+    that can be used commercially without attribution.
+    """
+    success, message, results = await search_freesound(
+        query=request.query,
+        min_duration=request.min_duration,
+        max_duration=request.max_duration,
+        page=request.page,
+        page_size=request.page_size,
+        commercial_only=request.commercial_only,
+    )
+
+    if not success:
+        raise HTTPException(status_code=400, detail=message)
+
+    def is_commercial_ok(license_str: str) -> bool:
+        return "CC0" in license_str or license_str == "CC BY (Attribution)"
+
+    return {
+        "message": message,
+        "commercial_only": request.commercial_only,
+        "results": [
+            {
+                "id": r.id,
+                "title": r.title,
+                "duration": r.duration,
+                "tags": r.tags,
+                "license": r.license,
+                "commercial_use_ok": is_commercial_ok(r.license),
+                "preview_url": r.preview_url,
+                "source": r.source,
+            }
+            for r in results
+        ],
+        "search_url": f"https://freesound.org/search/?q={request.query}",
+    }
+
+
+@router.post("/freesound/download", response_model=BGMUploadResponse)
+async def download_freesound_api(request: FreesoundDownloadRequest):
+    """
+    Download a sound from Freesound by ID.
+
+    Downloads the high-quality preview (128kbps MP3).
+    """
+    name = request.name.lower().replace(" ", "_")
+    name = "".join(c for c in name if c.isalnum() or c == "_")
+
+    success, message, file_path = await download_freesound(
+        sound_id=request.sound_id,
+        output_dir=settings.BGM_DIR,
+        filename=name,
+    )
+
+    if not success:
+        raise HTTPException(status_code=400, detail=message)
+
+    return BGMUploadResponse(
+        id=name,
+        name=request.name,
+        message=message,
+    )
+
+
+@router.post("/auto-download")
+async def auto_download_bgm(request: AutoBGMRequest):
+    """
+    Automatically search and download BGM based on keywords.
+
+    Use this with keywords from /recommend endpoint to auto-download matching BGM.
+    Requires FREESOUND_API_KEY to be configured.
+
+    Set commercial_only=true (default) to only download CC0 licensed sounds
+    that can be used commercially without attribution.
+    """
+    success, message, file_path, matched_result = await search_and_download_bgm(
+        keywords=request.keywords,
+        output_dir=settings.BGM_DIR,
+        max_duration=request.max_duration,
+        commercial_only=request.commercial_only,
+    )
+
+    if not success:
+        return {
+            "success": False,
+            "message": message,
+            "downloaded": None,
+            "suggestion": "Configure FREESOUND_API_KEY or manually download from Pixabay/Mixkit",
+        }
+
+    # Get duration of downloaded file
+    duration = 0
+    if file_path:
+        duration = await get_audio_duration(file_path) or 0
+
+    # Check if license is commercially usable
+    license_name = matched_result.license if matched_result else ""
+    commercial_ok = "CC0" in license_name or license_name == "CC BY (Attribution)"
+
+    return {
+        "success": True,
+        "message": message,
+        "downloaded": {
+            "id": os.path.splitext(os.path.basename(file_path))[0] if file_path else None,
+            "name": matched_result.title if matched_result else None,
+            "duration": duration,
+            "license": license_name,
+            "commercial_use_ok": commercial_ok,
+            "source": "freesound",
+            "path": f"/static/bgm/{os.path.basename(file_path)}" if file_path else None,
+        },
+        "original": {
+            "freesound_id": matched_result.id if matched_result else None,
+            "tags": matched_result.tags if matched_result else [],
+        },
+    }
+
+
+@router.get("/defaults/status")
+async def get_default_bgm_status():
+    """
+    Check status of default BGM tracks.
+
+    Returns which default tracks are installed and which are missing.
+    """
+    status = check_default_bgm_status(settings.BGM_DIR)
+
+    # Add track details
+    tracks = []
+    for track in DEFAULT_BGM_TRACKS:
+        installed = track.id in status["installed_ids"]
+        tracks.append({
+            "id": track.id,
+            "name": track.name,
+            "category": track.category,
+            "description": track.description,
+            "installed": installed,
+        })
+
+    return {
+        "total": status["total"],
+        "installed": status["installed"],
+        "missing": status["missing"],
+        "tracks": tracks,
+    }
+
+
+@router.post("/defaults/initialize")
+async def initialize_default_bgms(force: bool = False):
+    """
+    Download default BGM tracks.
+
+    Downloads pre-selected royalty-free BGM tracks (Pixabay License).
+    Use force=true to re-download all tracks.
+
+    These tracks are free for commercial use without attribution.
+    """
+    downloaded, skipped, errors = await initialize_default_bgm(
+        settings.BGM_DIR,
+        force=force,
+    )
+
+    return {
+        "success": len(errors) == 0,
+        "downloaded": downloaded,
+        "skipped": skipped,
+        "errors": errors,
+        "message": f"Downloaded {downloaded} tracks, skipped {skipped} existing" if downloaded > 0
+                   else "All default tracks already installed" if skipped > 0
+                   else "Failed to download tracks",
+    }
+
+
+@router.get("/defaults/list")
+async def list_default_bgms():
+    """
+    Get list of available default BGM tracks with metadata.
+
+    Returns information about all pre-configured default tracks.
+    """
+    tracks = await get_default_bgm_list()
+    status = check_default_bgm_status(settings.BGM_DIR)
+
+    for track in tracks:
+        track["installed"] = track["id"] in status["installed_ids"]
+
+    return {
+        "tracks": tracks,
+        "total": len(tracks),
+        "installed": status["installed"],
+        "license": "Pixabay License (Free for commercial use, no attribution required)",
+    }
+
+
+@router.get("/{bgm_id}")
+async def get_bgm(bgm_id: str):
+    """Get BGM info by ID."""
+    for ext in (".mp3", ".wav", ".m4a", ".ogg"):
+        filepath = os.path.join(settings.BGM_DIR, f"{bgm_id}{ext}")
+        if os.path.exists(filepath):
+            duration = await get_audio_duration(filepath)
+            return BGMInfo(
+                id=bgm_id,
+                name=bgm_id.replace("_", " ").replace("-", " ").title(),
+                duration=duration or 0,
+                path=f"/static/bgm/{bgm_id}{ext}"
+            )
+
+    raise HTTPException(status_code=404, detail="BGM not found")
--- a/backend/app/routers/download.py
+++ b/backend/app/routers/download.py
@@ -0,0 +1,62 @@
+from fastapi import APIRouter, BackgroundTasks, HTTPException
+from app.models.schemas import DownloadRequest, DownloadResponse, JobStatus
+from app.models.job_store import job_store
+from app.services.downloader import download_video, detect_platform
+
+router = APIRouter()
+
+
+async def download_task(job_id: str, url: str):
+    """Background task for downloading video."""
+    job_store.update_job(job_id, status=JobStatus.DOWNLOADING, progress=10)
+
+    success, message, video_path = await download_video(url, job_id)
+
+    if success:
+        job_store.update_job(
+            job_id,
+            status=JobStatus.READY_FOR_TRIM,  # Ready for trimming step
+            video_path=video_path,
+            progress=30,
+        )
+    else:
+        job_store.update_job(
+            job_id,
+            status=JobStatus.FAILED,
+            error=message,
+        )
+
+
+@router.post("/", response_model=DownloadResponse)
+async def start_download(
+    request: DownloadRequest,
+    background_tasks: BackgroundTasks
+):
+    """Start video download from URL."""
+    platform = request.platform or detect_platform(request.url)
+
+    # Create job
+    job = job_store.create_job(original_url=request.url)
+
+    # Start background download
+    background_tasks.add_task(download_task, job.job_id, request.url)
+
+    return DownloadResponse(
+        job_id=job.job_id,
+        status=JobStatus.PENDING,
+        message=f"Download started for {platform} video"
+    )
+
+
+@router.get("/platforms")
+async def get_supported_platforms():
+    """Get list of supported platforms."""
+    return {
+        "platforms": [
+            {"id": "douyin", "name": "抖音 (Douyin)", "domains": ["douyin.com", "iesdouyin.com"]},
+            {"id": "kuaishou", "name": "快手 (Kuaishou)", "domains": ["kuaishou.com", "gifshow.com"]},
+            {"id": "bilibili", "name": "哔哩哔哩 (Bilibili)", "domains": ["bilibili.com"]},
+            {"id": "tiktok", "name": "TikTok", "domains": ["tiktok.com"]},
+            {"id": "youtube", "name": "YouTube", "domains": ["youtube.com", "youtu.be"]},
+        ]
+    }
--- a/backend/app/routers/fonts.py
+++ b/backend/app/routers/fonts.py
@@ -0,0 +1,163 @@
+"""
+Fonts Router - Korean font management for subtitles.
+
+Provides font listing and recommendations for YouTube Shorts subtitles.
+"""
+
+from fastapi import APIRouter, HTTPException
+from app.models.schemas import FontInfo, KOREAN_FONTS, FONT_RECOMMENDATIONS
+
+router = APIRouter()
+
+
+@router.get("/")
+async def list_fonts():
+    """
+    List all available Korean fonts for subtitles.
+
+    Returns font information including:
+    - id: System font name to use in subtitle_style.font_name
+    - name: Display name in Korean
+    - style: Font style description
+    - recommended_for: Content types this font works well with
+    - download_url: Where to download the font
+    - license: Font license information
+    """
+    fonts = []
+    for font_id, font_info in KOREAN_FONTS.items():
+        fonts.append({
+            "id": font_info.id,
+            "name": font_info.name,
+            "style": font_info.style,
+            "recommended_for": font_info.recommended_for,
+            "download_url": font_info.download_url,
+            "license": font_info.license,
+        })
+
+    return {
+        "fonts": fonts,
+        "total": len(fonts),
+        "default": "NanumGothic",
+        "usage": "Set subtitle_style.font_name to the font id",
+    }
+
+
+@router.get("/recommend/{content_type}")
+async def recommend_fonts(content_type: str):
+    """
+    Get font recommendations for a specific content type.
+
+    Available content types:
+    - tutorial: 튜토리얼, 강의
+    - gaming: 게임, 리액션
+    - cooking: 요리, 먹방
+    - comedy: 코미디, 유머
+    - travel: 여행, 브이로그
+    - news: 뉴스, 정보
+    - asmr: ASMR, 릴렉스
+    - fitness: 운동, 피트니스
+    - tech: 기술, IT
+    - lifestyle: 라이프스타일, 일상
+    """
+    content_type_lower = content_type.lower()
+
+    if content_type_lower not in FONT_RECOMMENDATIONS:
+        available_types = list(FONT_RECOMMENDATIONS.keys())
+        raise HTTPException(
+            status_code=404,
+            detail=f"Unknown content type. Available: {', '.join(available_types)}"
+        )
+
+    recommended_ids = FONT_RECOMMENDATIONS[content_type_lower]
+    recommendations = []
+
+    for font_id in recommended_ids:
+        if font_id in KOREAN_FONTS:
+            font = KOREAN_FONTS[font_id]
+            recommendations.append({
+                "id": font.id,
+                "name": font.name,
+                "style": font.style,
+                "download_url": font.download_url,
+            })
+
+    return {
+        "content_type": content_type_lower,
+        "recommendations": recommendations,
+        "primary": recommended_ids[0] if recommended_ids else "NanumGothic",
+    }
+
+
+@router.get("/categories")
+async def list_font_categories():
+    """
+    List fonts grouped by style category.
+    """
+    categories = {
+        "clean": {
+            "name": "깔끔/모던",
+            "description": "정보성 콘텐츠, 튜토리얼에 적합",
+            "fonts": ["Pretendard", "SpoqaHanSansNeo", "NanumGothic"],
+        },
+        "friendly": {
+            "name": "친근/둥글",
+            "description": "일상, 라이프스타일 콘텐츠에 적합",
+            "fonts": ["GmarketSans", "NanumSquareRound", "Cafe24SsurroundAir"],
+        },
+        "handwriting": {
+            "name": "손글씨/캐주얼",
+            "description": "먹방, 요리, 유머 콘텐츠에 적합",
+            "fonts": ["BMDoHyeon", "BMJua", "DoHyeon"],
+        },
+        "impact": {
+            "name": "강조/임팩트",
+            "description": "게임, 하이라이트, 리액션에 적합",
+            "fonts": ["Cafe24Ssurround", "BlackHanSans"],
+        },
+    }
+
+    # Add font details to each category
+    for category_id, category_info in categories.items():
+        font_details = []
+        for font_id in category_info["fonts"]:
+            if font_id in KOREAN_FONTS:
+                font = KOREAN_FONTS[font_id]
+                font_details.append({
+                    "id": font.id,
+                    "name": font.name,
+                })
+        category_info["font_details"] = font_details
+
+    return {
+        "categories": categories,
+    }
+
+
+@router.get("/{font_id}")
+async def get_font(font_id: str):
+    """
+    Get detailed information about a specific font.
+    """
+    if font_id not in KOREAN_FONTS:
+        available_fonts = list(KOREAN_FONTS.keys())
+        raise HTTPException(
+            status_code=404,
+            detail=f"Font not found. Available fonts: {', '.join(available_fonts)}"
+        )
+
+    font = KOREAN_FONTS[font_id]
+    return {
+        "id": font.id,
+        "name": font.name,
+        "style": font.style,
+        "recommended_for": font.recommended_for,
+        "download_url": font.download_url,
+        "license": font.license,
+        "usage_example": {
+            "subtitle_style": {
+                "font_name": font.id,
+                "font_size": 36,
+                "position": "center",
+            }
+        },
+    }
--- a/backend/app/routers/jobs.py
+++ b/backend/app/routers/jobs.py
@@ -0,0 +1,175 @@
+import os
+import shutil
+from fastapi import APIRouter, HTTPException
+from fastapi.responses import FileResponse
+from app.models.schemas import JobInfo
+from app.models.job_store import job_store
+from app.config import settings
+
+router = APIRouter()
+
+
+@router.get("/", response_model=list[JobInfo])
+async def list_jobs(limit: int = 50):
+    """List all jobs."""
+    return job_store.list_jobs(limit=limit)
+
+
+@router.get("/{job_id}", response_model=JobInfo)
+async def get_job(job_id: str):
+    """Get job details."""
+    job = job_store.get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    print(f"[API GET] Job {job_id}: status={job.status}, progress={job.progress}")
+    return job
+
+
+@router.delete("/{job_id}")
+async def delete_job(job_id: str):
+    """Delete a job and its files."""
+    job = job_store.get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+
+    # Delete associated files
+    download_dir = os.path.join(settings.DOWNLOAD_DIR, job_id)
+    processed_dir = os.path.join(settings.PROCESSED_DIR, job_id)
+
+    if os.path.exists(download_dir):
+        shutil.rmtree(download_dir)
+    if os.path.exists(processed_dir):
+        shutil.rmtree(processed_dir)
+
+    job_store.delete_job(job_id)
+    return {"message": f"Job {job_id} deleted"}
+
+
+@router.get("/{job_id}/download")
+async def download_output(job_id: str):
+    """Download the processed video."""
+    job = job_store.get_job(job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    if not job.output_path or not os.path.exists(job.output_path):
+        raise HTTPException(status_code=404, detail="Output file not found")
+
+    return FileResponse(
+        path=job.output_path,
+        media_type="video/mp4",
+        filename=f"shorts_{job_id}.mp4"
+    )
+
+
+@router.get("/{job_id}/original")
+async def download_original(job_id: str):
+    """Download the original video."""
+    job = job_store.get_job(job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    if not job.video_path or not os.path.exists(job.video_path):
+        raise HTTPException(status_code=404, detail="Original video not found")
+
+    filename = os.path.basename(job.video_path)
+    # Disable caching to ensure trimmed video is always fetched fresh
+    return FileResponse(
+        path=job.video_path,
+        media_type="video/mp4",
+        filename=filename,
+        headers={
+            "Cache-Control": "no-cache, no-store, must-revalidate",
+            "Pragma": "no-cache",
+            "Expires": "0"
+        }
+    )
+
+
+@router.get("/{job_id}/subtitle")
+async def download_subtitle(job_id: str, format: str = "ass"):
+    """Download the subtitle file."""
+    job = job_store.get_job(job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    if not job.video_path:
+        raise HTTPException(status_code=404, detail="Video not found")
+
+    job_dir = os.path.dirname(job.video_path)
+    subtitle_path = os.path.join(job_dir, f"subtitle.{format}")
+
+    if not os.path.exists(subtitle_path):
+        # Try to generate from transcript
+        if job.transcript:
+            from app.services.transcriber import segments_to_ass, segments_to_srt
+
+            if format == "srt":
+                content = segments_to_srt(job.transcript, use_translated=True)
+            else:
+                content = segments_to_ass(job.transcript, use_translated=True)
+
+            with open(subtitle_path, "w", encoding="utf-8") as f:
+                f.write(content)
+        else:
+            raise HTTPException(status_code=404, detail="Subtitle not found")
+
+    return FileResponse(
+        path=subtitle_path,
+        media_type="text/plain",
+        filename=f"subtitle_{job_id}.{format}"
+    )
+
+
+@router.get("/{job_id}/thumbnail")
+async def download_thumbnail(job_id: str):
+    """Download the generated thumbnail image."""
+    job = job_store.get_job(job_id)
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+
+    # Check for thumbnail in processed directory
+    thumbnail_path = os.path.join(settings.PROCESSED_DIR, f"{job_id}_thumbnail.jpg")
+
+    if not os.path.exists(thumbnail_path):
+        raise HTTPException(status_code=404, detail="Thumbnail not found. Generate it first using /process/{job_id}/thumbnail")
+
+    return FileResponse(
+        path=thumbnail_path,
+        media_type="image/jpeg",
+        filename=f"thumbnail_{job_id}.jpg"
+    )
+
+
+@router.post("/{job_id}/re-edit")
+async def re_edit_job(job_id: str):
+    """Reset job status to awaiting_review for re-editing."""
+    from app.models.schemas import JobStatus
+
+    job = job_store.get_job(job_id)
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+
+    if job.status != JobStatus.COMPLETED:
+        raise HTTPException(
+            status_code=400,
+            detail="Only completed jobs can be re-edited"
+        )
+
+    # Check if transcript exists for re-editing
+    if not job.transcript:
+        raise HTTPException(
+            status_code=400,
+            detail="No transcript found. Cannot re-edit."
+        )
+
+    # Reset status to awaiting_review
+    job_store.update_job(
+        job_id,
+        status=JobStatus.AWAITING_REVIEW,
+        progress=70,
+        error=None
+    )
+
+    return {"message": "Job ready for re-editing", "job_id": job_id}
--- a/backend/app/routers/process.py
+++ b/backend/app/routers/process.py
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
@@ -0,0 +1,15 @@
+from app.services.downloader import download_video, detect_platform, get_video_info
+from app.services.transcriber import transcribe_video, segments_to_srt, segments_to_ass
+from app.services.translator import (
+    translate_segments,
+    translate_single,
+    generate_shorts_script,
+    TranslationMode,
+)
+from app.services.video_processor import (
+    process_video,
+    get_video_duration,
+    extract_audio,
+    extract_audio_with_noise_reduction,
+    analyze_audio_noise_level,
+)
--- a/backend/app/services/audio_separator.py
+++ b/backend/app/services/audio_separator.py
@@ -0,0 +1,317 @@
+"""
+Audio separation service using Demucs for vocal/music separation.
+Also includes speech vs singing detection.
+"""
+import subprocess
+import os
+import shutil
+from typing import Optional, Tuple
+from pathlib import Path
+
+# Demucs runs in a separate Python 3.11 environment due to compatibility issues
+DEMUCS_VENV_PATH = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
+    "venv_demucs"
+)
+DEMUCS_PYTHON = os.path.join(DEMUCS_VENV_PATH, "bin", "python")
+
+
+async def separate_vocals(
+    input_path: str,
+    output_dir: str,
+    model: str = "htdemucs"
+) -> Tuple[bool, str, Optional[str], Optional[str]]:
+    """
+    Separate vocals from background music using Demucs.
+
+    Args:
+        input_path: Path to input audio/video file
+        output_dir: Directory to save separated tracks
+        model: Demucs model to use (htdemucs, htdemucs_ft, mdx_extra)
+
+    Returns:
+        Tuple of (success, message, vocals_path, no_vocals_path)
+    """
+    if not os.path.exists(input_path):
+        return False, f"Input file not found: {input_path}", None, None
+
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Check if Demucs venv exists
+    if not os.path.exists(DEMUCS_PYTHON):
+        return False, f"Demucs environment not found at {DEMUCS_VENV_PATH}. Run setup script.", None, None
+
+    # Run Demucs for two-stem separation (vocals vs accompaniment)
+    cmd = [
+        DEMUCS_PYTHON, "-m", "demucs",
+        "--two-stems=vocals",
+        "-n", model,
+        "-o", output_dir,
+        input_path
+    ]
+
+    try:
+        print(f"Running Demucs separation: {' '.join(cmd)}")
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=600,  # 10 minute timeout
+        )
+
+        if result.returncode != 0:
+            error_msg = result.stderr[-500:] if result.stderr else "Unknown error"
+            return False, f"Demucs error: {error_msg}", None, None
+
+        # Find output files
+        # Demucs outputs to: output_dir/model_name/track_name/vocals.wav, no_vocals.wav
+        input_name = Path(input_path).stem
+        demucs_output = os.path.join(output_dir, model, input_name)
+
+        vocals_path = os.path.join(demucs_output, "vocals.wav")
+        no_vocals_path = os.path.join(demucs_output, "no_vocals.wav")
+
+        if not os.path.exists(vocals_path):
+            return False, "Vocals file not created", None, None
+
+        # Move files to simpler location
+        final_vocals = os.path.join(output_dir, "vocals.wav")
+        final_no_vocals = os.path.join(output_dir, "no_vocals.wav")
+
+        shutil.move(vocals_path, final_vocals)
+        if os.path.exists(no_vocals_path):
+            shutil.move(no_vocals_path, final_no_vocals)
+
+        # Clean up Demucs output directory
+        shutil.rmtree(os.path.join(output_dir, model), ignore_errors=True)
+
+        return True, "Vocals separated successfully", final_vocals, final_no_vocals
+
+    except subprocess.TimeoutExpired:
+        return False, "Separation timed out", None, None
+    except FileNotFoundError:
+        return False, "Demucs not installed. Run: pip install demucs", None, None
+    except Exception as e:
+        return False, f"Separation error: {str(e)}", None, None
+
+
+async def analyze_vocal_type(
+    vocals_path: str,
+    speech_threshold: float = 0.7
+) -> Tuple[str, float]:
+    """
+    Analyze if vocal track contains speech or singing.
+
+    Uses multiple heuristics:
+    1. Speech has more silence gaps (pauses between words)
+    2. Speech has more varied pitch changes
+    3. Singing has more sustained notes
+
+    Args:
+        vocals_path: Path to vocals audio file
+        speech_threshold: Threshold for speech detection (0-1)
+
+    Returns:
+        Tuple of (vocal_type, confidence)
+        vocal_type: "speech", "singing", or "mixed"
+    """
+    if not os.path.exists(vocals_path):
+        return "unknown", 0.0
+
+    # Analyze silence ratio using FFmpeg
+    # Speech typically has 30-50% silence, singing has less
+    silence_ratio = await _get_silence_ratio(vocals_path)
+
+    # Analyze zero-crossing rate (speech has higher ZCR variance)
+    zcr_variance = await _get_zcr_variance(vocals_path)
+
+    # Analyze spectral flatness (speech has higher flatness)
+    spectral_score = await _get_spectral_analysis(vocals_path)
+
+    # Combine scores
+    speech_score = 0.0
+
+    # High silence ratio indicates speech (pauses between sentences)
+    if silence_ratio > 0.25:
+        speech_score += 0.4
+    elif silence_ratio > 0.15:
+        speech_score += 0.2
+
+    # High spectral variance indicates speech
+    if spectral_score > 0.5:
+        speech_score += 0.3
+    elif spectral_score > 0.3:
+        speech_score += 0.15
+
+    # ZCR variance
+    if zcr_variance > 0.5:
+        speech_score += 0.3
+    elif zcr_variance > 0.3:
+        speech_score += 0.15
+
+    # Determine type
+    # speech_threshold=0.7: High confidence speech
+    # singing_threshold=0.4: Below this is likely singing (music)
+    # Between 0.4-0.7: Mixed or uncertain
+    if speech_score >= speech_threshold:
+        return "speech", speech_score
+    elif speech_score < 0.4:
+        return "singing", 1.0 - speech_score
+    else:
+        # For mixed, lean towards singing if score is closer to lower bound
+        # This helps avoid transcribing song lyrics as speech
+        return "mixed", speech_score
+
+
+async def _get_silence_ratio(audio_path: str, threshold_db: float = -35) -> float:
+    """Get ratio of silence in audio file."""
+    cmd = [
+        "ffmpeg", "-i", audio_path,
+        "-af", f"silencedetect=noise={threshold_db}dB:d=0.3",
+        "-f", "null", "-"
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        stderr = result.stderr
+
+        # Count silence periods
+        silence_count = stderr.count("silence_end")
+
+        # Get total duration
+        duration = await _get_audio_duration(audio_path)
+        if not duration or duration == 0:
+            return 0.0
+
+        # Parse total silence duration
+        total_silence = 0.0
+        lines = stderr.split('\n')
+        for line in lines:
+            if 'silence_duration' in line:
+                try:
+                    dur = float(line.split('silence_duration:')[1].strip().split()[0])
+                    total_silence += dur
+                except (IndexError, ValueError):
+                    pass
+
+        return min(total_silence / duration, 1.0)
+
+    except Exception:
+        return 0.0
+
+
+async def _get_zcr_variance(audio_path: str) -> float:
+    """Get zero-crossing rate variance (simplified estimation)."""
+    # Use FFmpeg to analyze audio stats
+    cmd = [
+        "ffmpeg", "-i", audio_path,
+        "-af", "astats=metadata=1:reset=1",
+        "-f", "null", "-"
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        stderr = result.stderr
+
+        # Look for RMS level variations as proxy for ZCR variance
+        rms_values = []
+        for line in stderr.split('\n'):
+            if 'RMS_level' in line:
+                try:
+                    val = float(line.split(':')[1].strip().split()[0])
+                    if val != float('-inf'):
+                        rms_values.append(val)
+                except (IndexError, ValueError):
+                    pass
+
+        if len(rms_values) > 1:
+            mean_rms = sum(rms_values) / len(rms_values)
+            variance = sum((x - mean_rms) ** 2 for x in rms_values) / len(rms_values)
+            # Normalize to 0-1 range
+            return min(variance / 100, 1.0)
+
+        return 0.3  # Default moderate value
+
+    except Exception:
+        return 0.3
+
+
+async def _get_spectral_analysis(audio_path: str) -> float:
+    """Analyze spectral characteristics (speech has more flat spectrum)."""
+    # Use volume detect as proxy for spectral analysis
+    cmd = [
+        "ffmpeg", "-i", audio_path,
+        "-af", "volumedetect",
+        "-f", "null", "-"
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        stderr = result.stderr
+
+        mean_vol = None
+        max_vol = None
+
+        for line in stderr.split('\n'):
+            if 'mean_volume' in line:
+                try:
+                    mean_vol = float(line.split(':')[1].strip().replace(' dB', ''))
+                except (IndexError, ValueError):
+                    pass
+            elif 'max_volume' in line:
+                try:
+                    max_vol = float(line.split(':')[1].strip().replace(' dB', ''))
+                except (IndexError, ValueError):
+                    pass
+
+        if mean_vol is not None and max_vol is not None:
+            # Large difference between mean and max indicates speech dynamics
+            diff = abs(max_vol - mean_vol)
+            # Speech typically has 15-25dB dynamic range
+            if diff > 20:
+                return 0.7
+            elif diff > 12:
+                return 0.5
+            else:
+                return 0.2
+
+        return 0.3
+
+    except Exception:
+        return 0.3
+
+
+async def _get_audio_duration(audio_path: str) -> Optional[float]:
+    """Get audio duration in seconds."""
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        audio_path
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+        if result.returncode == 0:
+            return float(result.stdout.strip())
+    except Exception:
+        pass
+
+    return None
+
+
+async def check_demucs_available() -> bool:
+    """Check if Demucs is installed in the dedicated environment."""
+    if not os.path.exists(DEMUCS_PYTHON):
+        return False
+
+    try:
+        result = subprocess.run(
+            [DEMUCS_PYTHON, "-m", "demucs", "--help"],
+            capture_output=True,
+            timeout=10
+        )
+        return result.returncode == 0
+    except Exception:
+        return False
--- a/backend/app/services/bgm_provider.py
+++ b/backend/app/services/bgm_provider.py
@@ -0,0 +1,495 @@
+"""
+BGM Provider Service - Freesound & Pixabay Integration
+
+Freesound API: https://freesound.org/docs/api/
+- 500,000+ Creative Commons licensed sounds
+- Free API with generous rate limits
+- Various licenses (CC0, CC-BY, CC-BY-NC, etc.)
+
+Pixabay: Manual download recommended (no public Music API)
+"""
+
+import os
+import httpx
+import aiofiles
+from typing import Optional, List, Tuple
+from pydantic import BaseModel
+from app.config import settings
+
+
+class FreesoundTrack(BaseModel):
+    """Freesound track model."""
+    id: int
+    name: str
+    duration: float  # seconds
+    tags: List[str]
+    license: str
+    username: str
+    preview_url: str  # HQ preview (128kbps mp3)
+    download_url: str  # Original file (requires auth)
+    description: str = ""
+
+
+class BGMSearchResult(BaseModel):
+    """BGM search result."""
+    id: str
+    title: str
+    duration: int
+    tags: List[str]
+    preview_url: str
+    download_url: str = ""
+    license: str = ""
+    source: str = "freesound"
+
+
+# Freesound license filters for commercial use
+# CC0 and CC-BY are commercially usable, CC-BY-NC is NOT
+COMMERCIAL_LICENSES = [
+    "Creative Commons 0",           # CC0 - Public Domain
+    "Attribution",                  # CC-BY - Attribution required
+    "Attribution Noncommercial",    # Exclude this (NOT commercial)
+]
+
+# License filter string for commercial-only search
+COMMERCIAL_LICENSE_FILTER = 'license:"Creative Commons 0" OR license:"Attribution"'
+
+
+async def search_freesound(
+    query: str,
+    min_duration: int = 10,
+    max_duration: int = 180,  # Shorts typically < 60s, allow some buffer
+    page: int = 1,
+    page_size: int = 15,
+    filter_music: bool = True,
+    commercial_only: bool = True,  # Default: only commercially usable
+) -> Tuple[bool, str, List[BGMSearchResult]]:
+    """
+    Search for sounds on Freesound API.
+
+    Args:
+        query: Search keywords (e.g., "upbeat music", "chill background")
+        min_duration: Minimum duration in seconds
+        max_duration: Maximum duration in seconds
+        page: Page number (1-indexed)
+        page_size: Results per page (max 150)
+        filter_music: Add "music" to query for better BGM results
+        commercial_only: Only return commercially usable licenses (CC0, CC-BY)
+
+    Returns:
+        Tuple of (success, message, results)
+    """
+    api_key = settings.FREESOUND_API_KEY
+    if not api_key:
+        return False, "Freesound API key not configured. Get one at https://freesound.org/apiv2/apply", []
+
+    # Add "music" filter for better BGM results
+    search_query = f"{query} music" if filter_music and "music" not in query.lower() else query
+
+    # Build filter string for duration and license
+    filter_parts = [f"duration:[{min_duration} TO {max_duration}]"]
+
+    if commercial_only:
+        # Filter for commercially usable licenses only
+        # CC0 (Creative Commons 0) and CC-BY (Attribution) are commercial-OK
+        # Exclude CC-BY-NC (Noncommercial)
+        filter_parts.append('license:"Creative Commons 0"')
+
+    filter_str = " ".join(filter_parts)
+
+    params = {
+        "token": api_key,
+        "query": search_query,
+        "filter": filter_str,
+        "page": page,
+        "page_size": min(page_size, 150),
+        "fields": "id,name,duration,tags,license,username,previews,description",
+        "sort": "score",  # relevance
+    }
+
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(
+                "https://freesound.org/apiv2/search/text/",
+                params=params,
+                timeout=30,
+            )
+
+            if response.status_code == 401:
+                return False, "Invalid Freesound API key", []
+
+            if response.status_code != 200:
+                return False, f"Freesound API error: HTTP {response.status_code}", []
+
+            data = response.json()
+            results = []
+
+            for sound in data.get("results", []):
+                # Get preview URLs (prefer high quality)
+                previews = sound.get("previews", {})
+                preview_url = (
+                    previews.get("preview-hq-mp3") or
+                    previews.get("preview-lq-mp3") or
+                    ""
+                )
+
+                # Parse license for display
+                license_url = sound.get("license", "")
+                license_name = _parse_freesound_license(license_url)
+
+                results.append(BGMSearchResult(
+                    id=str(sound["id"]),
+                    title=sound.get("name", "Unknown"),
+                    duration=int(sound.get("duration", 0)),
+                    tags=sound.get("tags", [])[:10],  # Limit tags
+                    preview_url=preview_url,
+                    download_url=f"https://freesound.org/apiv2/sounds/{sound['id']}/download/",
+                    license=license_name,
+                    source="freesound",
+                ))
+
+            total = data.get("count", 0)
+            license_info = " (commercial use OK)" if commercial_only else ""
+            message = f"Found {total} sounds on Freesound{license_info}"
+
+            return True, message, results
+
+    except httpx.TimeoutException:
+        return False, "Freesound API timeout", []
+    except Exception as e:
+        return False, f"Freesound search error: {str(e)}", []
+
+
+def _parse_freesound_license(license_url: str) -> str:
+    """Parse Freesound license URL to human-readable name."""
+    if "zero" in license_url or "cc0" in license_url.lower():
+        return "CC0 (Public Domain)"
+    elif "by-nc" in license_url:
+        return "CC BY-NC (Non-Commercial)"
+    elif "by-sa" in license_url:
+        return "CC BY-SA (Share Alike)"
+    elif "by/" in license_url:
+        return "CC BY (Attribution)"
+    elif "sampling+" in license_url:
+        return "Sampling+"
+    else:
+        return "See License"
+
+
+async def download_freesound(
+    sound_id: str,
+    output_dir: str,
+    filename: str,
+) -> Tuple[bool, str, Optional[str]]:
+    """
+    Download a sound from Freesound.
+
+    Note: Freesound requires OAuth for original file downloads.
+    This function downloads the HQ preview (128kbps MP3) which is sufficient for BGM.
+
+    Args:
+        sound_id: Freesound sound ID
+        output_dir: Directory to save file
+        filename: Output filename (without extension)
+
+    Returns:
+        Tuple of (success, message, file_path)
+    """
+    api_key = settings.FREESOUND_API_KEY
+    if not api_key:
+        return False, "Freesound API key not configured", None
+
+    try:
+        async with httpx.AsyncClient() as client:
+            # First, get sound info to get preview URL
+            info_response = await client.get(
+                f"https://freesound.org/apiv2/sounds/{sound_id}/",
+                params={
+                    "token": api_key,
+                    "fields": "id,name,previews,license,username",
+                },
+                timeout=30,
+            )
+
+            if info_response.status_code != 200:
+                return False, f"Failed to get sound info: HTTP {info_response.status_code}", None
+
+            sound_data = info_response.json()
+            previews = sound_data.get("previews", {})
+
+            # Get high quality preview URL
+            preview_url = previews.get("preview-hq-mp3")
+            if not preview_url:
+                preview_url = previews.get("preview-lq-mp3")
+
+            if not preview_url:
+                return False, "No preview URL available", None
+
+            # Download the preview
+            audio_response = await client.get(preview_url, timeout=60, follow_redirects=True)
+
+            if audio_response.status_code != 200:
+                return False, f"Download failed: HTTP {audio_response.status_code}", None
+
+            # Save file
+            os.makedirs(output_dir, exist_ok=True)
+            file_path = os.path.join(output_dir, f"{filename}.mp3")
+
+            async with aiofiles.open(file_path, 'wb') as f:
+                await f.write(audio_response.content)
+
+            # Get attribution info
+            username = sound_data.get("username", "Unknown")
+            license_name = _parse_freesound_license(sound_data.get("license", ""))
+
+            return True, f"Downloaded from Freesound (by {username}, {license_name})", file_path
+
+    except httpx.TimeoutException:
+        return False, "Download timeout", None
+    except Exception as e:
+        return False, f"Download error: {str(e)}", None
+
+
+async def search_and_download_bgm(
+    keywords: List[str],
+    output_dir: str,
+    max_duration: int = 120,
+    commercial_only: bool = True,
+) -> Tuple[bool, str, Optional[str], Optional[BGMSearchResult]]:
+    """
+    Search for BGM and download the best match.
+
+    Args:
+        keywords: Search keywords from BGM recommendation
+        output_dir: Directory to save downloaded file
+        max_duration: Maximum duration in seconds
+        commercial_only: Only search commercially usable licenses (CC0)
+
+    Returns:
+        Tuple of (success, message, file_path, matched_result)
+    """
+    if not settings.FREESOUND_API_KEY:
+        return False, "Freesound API key not configured", None, None
+
+    # Try searching with combined keywords
+    query = " ".join(keywords[:3])
+
+    success, message, results = await search_freesound(
+        query=query,
+        min_duration=15,
+        max_duration=max_duration,
+        page_size=10,
+        commercial_only=commercial_only,
+    )
+
+    if not success or not results:
+        # Try with individual keywords
+        for keyword in keywords[:3]:
+            success, message, results = await search_freesound(
+                query=keyword,
+                min_duration=15,
+                max_duration=max_duration,
+                page_size=5,
+                commercial_only=commercial_only,
+            )
+            if success and results:
+                break
+
+    if not results:
+        return False, "No matching BGM found on Freesound", None, None
+
+    # Select the best result (first one, sorted by relevance)
+    best_match = results[0]
+
+    # Download it
+    safe_filename = best_match.title.lower().replace(" ", "_")[:50]
+    safe_filename = "".join(c for c in safe_filename if c.isalnum() or c == "_")
+
+    success, download_msg, file_path = await download_freesound(
+        sound_id=best_match.id,
+        output_dir=output_dir,
+        filename=safe_filename,
+    )
+
+    if not success:
+        return False, download_msg, None, best_match
+
+    return True, download_msg, file_path, best_match
+
+
+async def search_pixabay_music(
+    query: str = "",
+    category: str = "",
+    min_duration: int = 0,
+    max_duration: int = 120,
+    page: int = 1,
+    per_page: int = 20,
+) -> Tuple[bool, str, List[BGMSearchResult]]:
+    """
+    Search for royalty-free music on Pixabay.
+    Note: Pixabay doesn't have a public Music API, returns curated list instead.
+    """
+    # Pixabay's music API is not publicly available
+    # Return curated recommendations instead
+    return await _get_curated_bgm_list(query)
+
+
+async def _get_curated_bgm_list(query: str = "") -> Tuple[bool, str, List[BGMSearchResult]]:
+    """
+    Return curated list of recommended free BGM sources.
+    Since Pixabay Music API requires special access, we provide curated recommendations.
+    """
+    # Curated BGM recommendations (these are categories/suggestions, not actual files)
+    curated_bgm = [
+        {
+            "id": "upbeat_energetic",
+            "title": "Upbeat & Energetic",
+            "duration": 60,
+            "tags": ["upbeat", "energetic", "happy", "positive"],
+            "description": "활기찬 쇼츠에 적합",
+        },
+        {
+            "id": "chill_lofi",
+            "title": "Chill Lo-Fi",
+            "duration": 60,
+            "tags": ["chill", "lofi", "relaxing", "calm"],
+            "description": "편안한 분위기의 콘텐츠",
+        },
+        {
+            "id": "epic_cinematic",
+            "title": "Epic & Cinematic",
+            "duration": 60,
+            "tags": ["epic", "cinematic", "dramatic", "intense"],
+            "description": "드라마틱한 순간",
+        },
+        {
+            "id": "funny_quirky",
+            "title": "Funny & Quirky",
+            "duration": 30,
+            "tags": ["funny", "quirky", "comedy", "playful"],
+            "description": "유머러스한 콘텐츠",
+        },
+        {
+            "id": "corporate_tech",
+            "title": "Corporate & Tech",
+            "duration": 60,
+            "tags": ["corporate", "tech", "modern", "professional"],
+            "description": "정보성 콘텐츠",
+        },
+    ]
+
+    # Filter by query if provided
+    if query:
+        query_lower = query.lower()
+        filtered = [
+            bgm for bgm in curated_bgm
+            if query_lower in bgm["title"].lower()
+            or any(query_lower in tag for tag in bgm["tags"])
+        ]
+        curated_bgm = filtered if filtered else curated_bgm
+
+    results = [
+        BGMSearchResult(
+            id=bgm["id"],
+            title=bgm["title"],
+            duration=bgm["duration"],
+            tags=bgm["tags"],
+            preview_url="",  # Would be filled with actual URL
+            source="curated",
+        )
+        for bgm in curated_bgm
+    ]
+
+    return True, "Curated BGM list", results
+
+
+async def download_from_url(
+    url: str,
+    output_path: str,
+    filename: str,
+) -> Tuple[bool, str, Optional[str]]:
+    """
+    Download audio file from URL.
+
+    Args:
+        url: Audio file URL
+        output_path: Directory to save file
+        filename: Output filename (without extension)
+
+    Returns:
+        Tuple of (success, message, file_path)
+    """
+    try:
+        os.makedirs(output_path, exist_ok=True)
+
+        async with httpx.AsyncClient() as client:
+            response = await client.get(url, timeout=60, follow_redirects=True)
+
+            if response.status_code != 200:
+                return False, f"Download failed: HTTP {response.status_code}", None
+
+            # Determine file extension from content-type
+            content_type = response.headers.get("content-type", "")
+            if "mpeg" in content_type:
+                ext = ".mp3"
+            elif "wav" in content_type:
+                ext = ".wav"
+            elif "ogg" in content_type:
+                ext = ".ogg"
+            else:
+                ext = ".mp3"  # Default to mp3
+
+            file_path = os.path.join(output_path, f"{filename}{ext}")
+
+            with open(file_path, "wb") as f:
+                f.write(response.content)
+
+            return True, "Download complete", file_path
+
+    except Exception as e:
+        return False, f"Download error: {str(e)}", None
+
+
+# Popular free BGM download links
+FREE_BGM_SOURCES = {
+    "freesound": {
+        "name": "Freesound",
+        "url": "https://freesound.org/",
+        "license": "CC0/CC-BY/CC-BY-NC (Various)",
+        "description": "500,000+ CC licensed sounds, API available",
+        "api_available": True,
+        "api_url": "https://freesound.org/apiv2/apply",
+    },
+    "pixabay": {
+        "name": "Pixabay Music",
+        "url": "https://pixabay.com/music/",
+        "license": "Pixabay License (Free for commercial use)",
+        "description": "Large collection of royalty-free music",
+        "api_available": False,
+    },
+    "mixkit": {
+        "name": "Mixkit",
+        "url": "https://mixkit.co/free-stock-music/",
+        "license": "Mixkit License (Free for commercial use)",
+        "description": "High-quality free music tracks",
+        "api_available": False,
+    },
+    "uppbeat": {
+        "name": "Uppbeat",
+        "url": "https://uppbeat.io/",
+        "license": "Free tier: 10 tracks/month",
+        "description": "YouTube-friendly music",
+        "api_available": False,
+    },
+    "youtube_audio_library": {
+        "name": "YouTube Audio Library",
+        "url": "https://studio.youtube.com/channel/UC/music",
+        "license": "Free for YouTube videos",
+        "description": "Google's free music library",
+        "api_available": False,
+    },
+}
+
+
+def get_free_bgm_sources() -> dict:
+    """Get list of recommended free BGM sources."""
+    return FREE_BGM_SOURCES
--- a/backend/app/services/bgm_recommender.py
+++ b/backend/app/services/bgm_recommender.py
@@ -0,0 +1,295 @@
+"""
+BGM Recommender Service
+
+Analyzes script content and recommends appropriate BGM based on mood/tone.
+Uses GPT to analyze the emotional tone and suggests matching music.
+"""
+
+import os
+from typing import List, Tuple, Optional
+from openai import OpenAI
+from pydantic import BaseModel
+from app.config import settings
+from app.models.schemas import TranscriptSegment
+
+
+class BGMRecommendation(BaseModel):
+    """BGM recommendation result."""
+    mood: str  # detected mood
+    energy: str  # low, medium, high
+    suggested_genres: List[str]
+    search_keywords: List[str]
+    reasoning: str
+    matched_bgm_id: Optional[str] = None  # if found in local library
+
+
+# Mood to BGM mapping
+MOOD_BGM_MAPPING = {
+    "upbeat": {
+        "genres": ["pop", "electronic", "dance"],
+        "keywords": ["upbeat", "energetic", "happy", "positive"],
+        "energy": "high",
+    },
+    "chill": {
+        "genres": ["lofi", "ambient", "acoustic"],
+        "keywords": ["chill", "relaxing", "calm", "peaceful"],
+        "energy": "low",
+    },
+    "dramatic": {
+        "genres": ["cinematic", "orchestral", "epic"],
+        "keywords": ["dramatic", "epic", "intense", "cinematic"],
+        "energy": "high",
+    },
+    "funny": {
+        "genres": ["comedy", "quirky", "playful"],
+        "keywords": ["funny", "quirky", "comedy", "playful"],
+        "energy": "medium",
+    },
+    "emotional": {
+        "genres": ["piano", "strings", "ballad"],
+        "keywords": ["emotional", "sad", "touching", "heartfelt"],
+        "energy": "low",
+    },
+    "informative": {
+        "genres": ["corporate", "background", "minimal"],
+        "keywords": ["corporate", "background", "tech", "modern"],
+        "energy": "medium",
+    },
+    "exciting": {
+        "genres": ["rock", "action", "sports"],
+        "keywords": ["exciting", "action", "sports", "adventure"],
+        "energy": "high",
+    },
+    "mysterious": {
+        "genres": ["ambient", "dark", "suspense"],
+        "keywords": ["mysterious", "suspense", "dark", "tension"],
+        "energy": "medium",
+    },
+}
+
+
+async def analyze_script_mood(
+    segments: List[TranscriptSegment],
+    use_translated: bool = True,
+) -> Tuple[bool, str, Optional[BGMRecommendation]]:
+    """
+    Analyze script content to determine mood and recommend BGM.
+
+    Args:
+        segments: Transcript segments (original or translated)
+        use_translated: Whether to use translated text
+
+    Returns:
+        Tuple of (success, message, recommendation)
+    """
+    if not settings.OPENAI_API_KEY:
+        return False, "OpenAI API key not configured", None
+
+    if not segments:
+        return False, "No transcript segments provided", None
+
+    # Combine script text
+    script_text = "\n".join([
+        seg.translated if use_translated and seg.translated else seg.text
+        for seg in segments
+    ])
+
+    try:
+        client = OpenAI(api_key=settings.OPENAI_API_KEY)
+
+        response = client.chat.completions.create(
+            model=settings.OPENAI_MODEL,
+            messages=[
+                {
+                    "role": "system",
+                    "content": """You are a music supervisor for YouTube Shorts.
+Analyze the script and determine the best background music mood.
+
+Respond in JSON format ONLY:
+{
+    "mood": "one of: upbeat, chill, dramatic, funny, emotional, informative, exciting, mysterious",
+    "energy": "low, medium, or high",
+    "reasoning": "brief explanation in Korean (1 sentence)"
+}
+
+Consider:
+- Overall emotional tone of the content
+- Pacing and energy level
+- Target audience engagement
+- What would make viewers watch till the end"""
+                },
+                {
+                    "role": "user",
+                    "content": f"Script:\n{script_text}"
+                }
+            ],
+            temperature=0.3,
+            max_tokens=200,
+        )
+
+        # Parse response
+        import json
+        result_text = response.choices[0].message.content.strip()
+
+        # Clean up JSON if wrapped in markdown
+        if result_text.startswith("```"):
+            result_text = result_text.split("```")[1]
+            if result_text.startswith("json"):
+                result_text = result_text[4:]
+
+        result = json.loads(result_text)
+
+        mood = result.get("mood", "upbeat")
+        energy = result.get("energy", "medium")
+        reasoning = result.get("reasoning", "")
+
+        # Get BGM suggestions based on mood
+        mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
+
+        recommendation = BGMRecommendation(
+            mood=mood,
+            energy=energy,
+            suggested_genres=mood_info["genres"],
+            search_keywords=mood_info["keywords"],
+            reasoning=reasoning,
+        )
+
+        return True, f"Mood analysis complete: {mood}", recommendation
+
+    except json.JSONDecodeError as e:
+        return False, f"Failed to parse mood analysis: {str(e)}", None
+    except Exception as e:
+        return False, f"Mood analysis error: {str(e)}", None
+
+
+async def find_matching_bgm(
+    recommendation: BGMRecommendation,
+    available_bgm: List[dict],
+) -> Optional[str]:
+    """
+    Find a matching BGM from available library based on recommendation.
+
+    Args:
+        recommendation: BGM recommendation from mood analysis
+        available_bgm: List of available BGM info dicts with 'id' and 'name'
+
+    Returns:
+        BGM ID if found, None otherwise
+    """
+    if not available_bgm:
+        return None
+
+    keywords = recommendation.search_keywords + [recommendation.mood]
+
+    # Score each BGM based on keyword matching
+    best_match = None
+    best_score = 0
+
+    for bgm in available_bgm:
+        bgm_name = bgm.get("name", "").lower()
+        bgm_id = bgm.get("id", "").lower()
+
+        score = 0
+        for keyword in keywords:
+            if keyword.lower() in bgm_name or keyword.lower() in bgm_id:
+                score += 1
+
+        if score > best_score:
+            best_score = score
+            best_match = bgm.get("id")
+
+    return best_match if best_score > 0 else None
+
+
+async def recommend_bgm_for_script(
+    segments: List[TranscriptSegment],
+    available_bgm: List[dict],
+    use_translated: bool = True,
+) -> Tuple[bool, str, Optional[BGMRecommendation]]:
+    """
+    Complete BGM recommendation workflow:
+    1. Analyze script mood
+    2. Find matching BGM from library
+    3. Return recommendation with search keywords for external sources
+
+    Args:
+        segments: Transcript segments
+        available_bgm: List of available BGM in library
+        use_translated: Whether to use translated text
+
+    Returns:
+        Tuple of (success, message, recommendation with matched_bgm_id if found)
+    """
+    # Step 1: Analyze mood
+    success, message, recommendation = await analyze_script_mood(
+        segments, use_translated
+    )
+
+    if not success or not recommendation:
+        return success, message, recommendation
+
+    # Step 2: Find matching BGM in library
+    matched_id = await find_matching_bgm(recommendation, available_bgm)
+
+    if matched_id:
+        recommendation.matched_bgm_id = matched_id
+        message = f"Mood: {recommendation.mood} | Matched BGM: {matched_id}"
+    else:
+        message = f"Mood: {recommendation.mood} | No local BGM matched, search with: {', '.join(recommendation.search_keywords[:3])}"
+
+    return True, message, recommendation
+
+
+# Predefined BGM presets for common content types
+BGM_PRESETS = {
+    "cooking": {
+        "mood": "chill",
+        "keywords": ["cooking", "food", "kitchen", "cozy"],
+    },
+    "fitness": {
+        "mood": "upbeat",
+        "keywords": ["workout", "fitness", "energetic", "motivation"],
+    },
+    "tutorial": {
+        "mood": "informative",
+        "keywords": ["tutorial", "tech", "corporate", "background"],
+    },
+    "comedy": {
+        "mood": "funny",
+        "keywords": ["funny", "comedy", "quirky", "playful"],
+    },
+    "travel": {
+        "mood": "exciting",
+        "keywords": ["travel", "adventure", "upbeat", "inspiring"],
+    },
+    "asmr": {
+        "mood": "chill",
+        "keywords": ["asmr", "relaxing", "ambient", "soft"],
+    },
+    "news": {
+        "mood": "informative",
+        "keywords": ["news", "corporate", "serious", "background"],
+    },
+    "gaming": {
+        "mood": "exciting",
+        "keywords": ["gaming", "electronic", "action", "intense"],
+    },
+}
+
+
+def get_preset_recommendation(content_type: str) -> Optional[BGMRecommendation]:
+    """Get BGM recommendation for common content types."""
+    preset = BGM_PRESETS.get(content_type.lower())
+    if not preset:
+        return None
+
+    mood = preset["mood"]
+    mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
+
+    return BGMRecommendation(
+        mood=mood,
+        energy=mood_info["energy"],
+        suggested_genres=mood_info["genres"],
+        search_keywords=preset["keywords"],
+        reasoning=f"Preset for {content_type} content",
+    )
--- a/backend/app/services/default_bgm.py
+++ b/backend/app/services/default_bgm.py
@@ -0,0 +1,297 @@
+"""
+Default BGM Initializer
+
+Downloads pre-selected royalty-free BGM tracks on first startup.
+Tracks are from Kevin MacLeod (incompetech.com) - CC-BY 4.0 License.
+Free for commercial use with attribution: "Kevin MacLeod (incompetech.com)"
+"""
+
+import os
+import httpx
+import aiofiles
+import asyncio
+from typing import List, Tuple, Optional
+from pydantic import BaseModel
+
+
+class DefaultBGM(BaseModel):
+    """Default BGM track info."""
+    id: str
+    name: str
+    url: str
+    category: str
+    description: str
+
+
+# Curated list of royalty-free BGM from Kevin MacLeod (incompetech.com)
+# CC-BY 4.0 License - Free for commercial use with attribution
+# Attribution: "Kevin MacLeod (incompetech.com)"
+DEFAULT_BGM_TRACKS: List[DefaultBGM] = [
+    # === 활기찬/에너지 (Upbeat/Energetic) ===
+    DefaultBGM(
+        id="upbeat_energetic",
+        name="Upbeat Energetic",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Vivacity.mp3",
+        category="upbeat",
+        description="활기차고 에너지 넘치는 BGM - 피트니스, 챌린지 영상",
+    ),
+    DefaultBGM(
+        id="happy_pop",
+        name="Happy Pop",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Carefree.mp3",
+        category="upbeat",
+        description="밝고 경쾌한 팝 BGM - 제품 소개, 언박싱",
+    ),
+    DefaultBGM(
+        id="upbeat_fun",
+        name="Upbeat Fun",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Happy%20Happy%20Game%20Show.mp3",
+        category="upbeat",
+        description="신나는 게임쇼 비트 - 트렌디한 쇼츠",
+    ),
+
+    # === 차분한/편안한 (Chill/Relaxing) ===
+    DefaultBGM(
+        id="chill_lofi",
+        name="Chill Lo-Fi",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Gymnopedie%20No%201.mp3",
+        category="chill",
+        description="차분하고 편안한 피아노 BGM - 일상, 브이로그",
+    ),
+    DefaultBGM(
+        id="calm_piano",
+        name="Calm Piano",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Prelude%20No.%201.mp3",
+        category="chill",
+        description="잔잔한 피아노 BGM - 감성적인 콘텐츠",
+    ),
+    DefaultBGM(
+        id="soft_ambient",
+        name="Soft Ambient",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Dreamlike.mp3",
+        category="chill",
+        description="부드러운 앰비언트 - ASMR, 수면 콘텐츠",
+    ),
+
+    # === 유머/코미디 (Funny/Comedy) ===
+    DefaultBGM(
+        id="funny_comedy",
+        name="Funny Comedy",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Sneaky%20Snitch.mp3",
+        category="funny",
+        description="유쾌한 코미디 BGM - 코미디, 밈 영상",
+    ),
+    DefaultBGM(
+        id="quirky_playful",
+        name="Quirky Playful",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Monkeys%20Spinning%20Monkeys.mp3",
+        category="funny",
+        description="장난스럽고 귀여운 BGM - 펫, 키즈 콘텐츠",
+    ),
+
+    # === 드라마틱/시네마틱 (Cinematic) ===
+    DefaultBGM(
+        id="cinematic_epic",
+        name="Cinematic Epic",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Epic%20Unease.mp3",
+        category="cinematic",
+        description="웅장한 시네마틱 BGM - 리뷰, 소개 영상",
+    ),
+    DefaultBGM(
+        id="inspirational",
+        name="Inspirational",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Hero%20Theme.mp3",
+        category="cinematic",
+        description="영감을 주는 BGM - 동기부여, 성장 콘텐츠",
+    ),
+
+    # === 생활용품/제품 리뷰 (Lifestyle/Product) ===
+    DefaultBGM(
+        id="lifestyle_modern",
+        name="Lifestyle Modern",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Acoustic%20Breeze.mp3",
+        category="lifestyle",
+        description="모던한 라이프스타일 BGM - 제품 리뷰",
+    ),
+    DefaultBGM(
+        id="shopping_bright",
+        name="Shopping Bright",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Pleasant%20Porridge.mp3",
+        category="lifestyle",
+        description="밝은 쇼핑 BGM - 하울, 추천 영상",
+    ),
+    DefaultBGM(
+        id="soft_corporate",
+        name="Soft Corporate",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Laid%20Back%20Guitars.mp3",
+        category="lifestyle",
+        description="부드러운 기업형 BGM - 정보성 콘텐츠",
+    ),
+
+    # === 어쿠스틱/감성 (Acoustic/Emotional) ===
+    DefaultBGM(
+        id="soft_acoustic",
+        name="Soft Acoustic",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Peaceful.mp3",
+        category="acoustic",
+        description="따뜻한 어쿠스틱 BGM - 요리, 일상 브이로그",
+    ),
+    DefaultBGM(
+        id="gentle_guitar",
+        name="Gentle Guitar",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Sunflower%20Slow%20Drag.mp3",
+        category="acoustic",
+        description="잔잔한 기타 BGM - 여행, 풍경 영상",
+    ),
+
+    # === 트렌디/일렉트로닉 (Trendy/Electronic) ===
+    DefaultBGM(
+        id="electronic_chill",
+        name="Electronic Chill",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Digital%20Lemonade.mp3",
+        category="electronic",
+        description="일렉트로닉 칠아웃 - 테크, 게임 콘텐츠",
+    ),
+    DefaultBGM(
+        id="driving_beat",
+        name="Driving Beat",
+        url="https://incompetech.com/music/royalty-free/mp3-royaltyfree/Cipher.mp3",
+        category="electronic",
+        description="드라이빙 비트 - 스포츠, 액션 영상",
+    ),
+]
+
+
+async def download_bgm_file(
+    url: str,
+    output_path: str,
+    timeout: int = 60,
+) -> Tuple[bool, str]:
+    """
+    Download a single BGM file.
+
+    Args:
+        url: Download URL
+        output_path: Full path to save the file
+        timeout: Download timeout in seconds
+
+    Returns:
+        Tuple of (success, message)
+    """
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+        "Accept": "audio/mpeg,audio/*;q=0.9,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.9",
+    }
+
+    try:
+        async with httpx.AsyncClient(follow_redirects=True, headers=headers) as client:
+            response = await client.get(url, timeout=timeout)
+
+            if response.status_code != 200:
+                return False, f"HTTP {response.status_code}"
+
+            # Ensure directory exists
+            os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+            # Save file
+            async with aiofiles.open(output_path, 'wb') as f:
+                await f.write(response.content)
+
+            return True, "Downloaded successfully"
+
+    except httpx.TimeoutException:
+        return False, "Download timeout"
+    except Exception as e:
+        return False, str(e)
+
+
+async def initialize_default_bgm(
+    bgm_dir: str,
+    force: bool = False,
+) -> Tuple[int, int, List[str]]:
+    """
+    Initialize default BGM tracks.
+
+    Downloads default BGM tracks if not already present.
+
+    Args:
+        bgm_dir: Directory to save BGM files
+        force: Force re-download even if files exist
+
+    Returns:
+        Tuple of (downloaded_count, skipped_count, error_messages)
+    """
+    os.makedirs(bgm_dir, exist_ok=True)
+
+    downloaded = 0
+    skipped = 0
+    errors = []
+
+    for track in DEFAULT_BGM_TRACKS:
+        output_path = os.path.join(bgm_dir, f"{track.id}.mp3")
+
+        # Skip if already exists (unless force=True)
+        if os.path.exists(output_path) and not force:
+            skipped += 1
+            print(f"[BGM] Skipping {track.name} (already exists)")
+            continue
+
+        print(f"[BGM] Downloading {track.name}...")
+        success, message = await download_bgm_file(track.url, output_path)
+
+        if success:
+            downloaded += 1
+            print(f"[BGM] Downloaded {track.name}")
+        else:
+            errors.append(f"{track.name}: {message}")
+            print(f"[BGM] Failed to download {track.name}: {message}")
+
+    return downloaded, skipped, errors
+
+
+async def get_default_bgm_list() -> List[dict]:
+    """
+    Get list of default BGM tracks with metadata.
+
+    Returns:
+        List of BGM info dictionaries
+    """
+    return [
+        {
+            "id": track.id,
+            "name": track.name,
+            "category": track.category,
+            "description": track.description,
+        }
+        for track in DEFAULT_BGM_TRACKS
+    ]
+
+
+def check_default_bgm_status(bgm_dir: str) -> dict:
+    """
+    Check which default BGM tracks are installed.
+
+    Args:
+        bgm_dir: BGM directory path
+
+    Returns:
+        Status dictionary with installed/missing tracks
+    """
+    installed = []
+    missing = []
+
+    for track in DEFAULT_BGM_TRACKS:
+        file_path = os.path.join(bgm_dir, f"{track.id}.mp3")
+        if os.path.exists(file_path):
+            installed.append(track.id)
+        else:
+            missing.append(track.id)
+
+    return {
+        "total": len(DEFAULT_BGM_TRACKS),
+        "installed": len(installed),
+        "missing": len(missing),
+        "installed_ids": installed,
+        "missing_ids": missing,
+    }
--- a/backend/app/services/downloader.py
+++ b/backend/app/services/downloader.py
@@ -0,0 +1,158 @@
+import subprocess
+import os
+import re
+from typing import Optional, Tuple
+from app.config import settings
+
+
+def detect_platform(url: str) -> str:
+    """Detect video platform from URL."""
+    if "douyin" in url or "iesdouyin" in url:
+        return "douyin"
+    elif "kuaishou" in url or "gifshow" in url:
+        return "kuaishou"
+    elif "bilibili" in url:
+        return "bilibili"
+    elif "youtube" in url or "youtu.be" in url:
+        return "youtube"
+    elif "tiktok" in url:
+        return "tiktok"
+    else:
+        return "unknown"
+
+
+def sanitize_filename(filename: str) -> str:
+    """Sanitize filename to be safe for filesystem."""
+    # Remove or replace invalid characters
+    filename = re.sub(r'[<>:"/\\|?*]', '_', filename)
+    # Limit length
+    if len(filename) > 100:
+        filename = filename[:100]
+    return filename
+
+
+def get_cookies_path(platform: str) -> Optional[str]:
+    """Get cookies file path for a platform."""
+    cookies_dir = os.path.join(os.path.dirname(settings.DOWNLOAD_DIR), "cookies")
+
+    # Check for platform-specific cookies first (e.g., douyin.txt)
+    platform_cookies = os.path.join(cookies_dir, f"{platform}.txt")
+    if os.path.exists(platform_cookies):
+        return platform_cookies
+
+    # Check for generic cookies.txt
+    generic_cookies = os.path.join(cookies_dir, "cookies.txt")
+    if os.path.exists(generic_cookies):
+        return generic_cookies
+
+    return None
+
+
+async def download_video(url: str, job_id: str) -> Tuple[bool, str, Optional[str]]:
+    """
+    Download video using yt-dlp.
+
+    Returns:
+        Tuple of (success, message, video_path)
+    """
+    output_dir = os.path.join(settings.DOWNLOAD_DIR, job_id)
+    os.makedirs(output_dir, exist_ok=True)
+
+    output_template = os.path.join(output_dir, "%(title).50s.%(ext)s")
+
+    # yt-dlp command with options for Chinese platforms
+    cmd = [
+        "yt-dlp",
+        "--no-playlist",
+        "-f", "best[ext=mp4]/best",
+        "--merge-output-format", "mp4",
+        "-o", output_template,
+        "--no-check-certificate",
+        "--socket-timeout", "30",
+        "--retries", "3",
+        "--user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+    ]
+
+    platform = detect_platform(url)
+
+    # Add cookies if available (required for Douyin, Kuaishou)
+    cookies_path = get_cookies_path(platform)
+    if cookies_path:
+        cmd.extend(["--cookies", cookies_path])
+        print(f"Using cookies from: {cookies_path}")
+    elif platform in ["douyin", "kuaishou", "bilibili"]:
+        # Try to use browser cookies if no cookies file
+        # Priority: Chrome > Firefox > Edge
+        cmd.extend(["--cookies-from-browser", "chrome"])
+        print(f"Using cookies from Chrome browser for {platform}")
+
+    # Platform-specific options
+    if platform in ["douyin", "kuaishou"]:
+        # Use browser impersonation for anti-bot bypass
+        cmd.extend([
+            "--impersonate", "chrome-123:macos-14",
+            "--extractor-args", "generic:impersonate",
+        ])
+
+    # Add proxy if configured (for geo-restricted platforms)
+    if settings.PROXY_URL:
+        cmd.extend(["--proxy", settings.PROXY_URL])
+        print(f"Using proxy: {settings.PROXY_URL}")
+
+    cmd.append(url)
+
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=300,  # 5 minute timeout
+        )
+
+        if result.returncode != 0:
+            error_msg = result.stderr or result.stdout or "Unknown error"
+            return False, f"Download failed: {error_msg}", None
+
+        # Find the downloaded file
+        for file in os.listdir(output_dir):
+            if file.endswith((".mp4", ".webm", ".mkv")):
+                video_path = os.path.join(output_dir, file)
+                return True, "Download successful", video_path
+
+        return False, "No video file found after download", None
+
+    except subprocess.TimeoutExpired:
+        return False, "Download timed out (5 minutes)", None
+    except Exception as e:
+        return False, f"Download error: {str(e)}", None
+
+
+def get_video_info(url: str) -> Optional[dict]:
+    """Get video metadata without downloading."""
+    cmd = [
+        "yt-dlp",
+        "-j",  # JSON output
+        "--no-download",
+    ]
+
+    # Add proxy if configured
+    if settings.PROXY_URL:
+        cmd.extend(["--proxy", settings.PROXY_URL])
+
+    cmd.append(url)
+
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+
+        if result.returncode == 0:
+            import json
+            return json.loads(result.stdout)
+    except Exception:
+        pass
+
+    return None
--- a/backend/app/services/thumbnail.py
+++ b/backend/app/services/thumbnail.py
@@ -0,0 +1,399 @@
+"""
+Thumbnail Generator Service
+
+Generates YouTube Shorts thumbnails with:
+1. Frame extraction from video
+2. GPT-generated catchphrase
+3. Text overlay with styling
+"""
+
+import os
+import subprocess
+import asyncio
+from typing import Optional, Tuple, List
+from openai import OpenAI
+from PIL import Image, ImageDraw, ImageFont
+from app.config import settings
+from app.models.schemas import TranscriptSegment
+
+
+def get_openai_client() -> OpenAI:
+    """Get OpenAI client."""
+    return OpenAI(api_key=settings.OPENAI_API_KEY)
+
+
+async def extract_frame(
+    video_path: str,
+    output_path: str,
+    timestamp: float = 2.0,
+) -> Tuple[bool, str]:
+    """
+    Extract a single frame from video.
+
+    Args:
+        video_path: Path to video file
+        output_path: Path to save thumbnail image
+        timestamp: Time in seconds to extract frame
+
+    Returns:
+        Tuple of (success, message)
+    """
+    try:
+        cmd = [
+            "ffmpeg", "-y",
+            "-ss", str(timestamp),
+            "-i", video_path,
+            "-vframes", "1",
+            "-q:v", "2",  # High quality JPEG
+            output_path
+        ]
+
+        process = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE
+        )
+
+        _, stderr = await process.communicate()
+
+        if process.returncode != 0:
+            return False, f"FFmpeg error: {stderr.decode()[:200]}"
+
+        if not os.path.exists(output_path):
+            return False, "Frame extraction failed - no output file"
+
+        return True, "Frame extracted successfully"
+
+    except Exception as e:
+        return False, f"Frame extraction error: {str(e)}"
+
+
+async def generate_catchphrase(
+    transcript: List[TranscriptSegment],
+    style: str = "homeshopping",
+) -> Tuple[bool, str, str]:
+    """
+    Generate a catchy thumbnail text using GPT.
+
+    Args:
+        transcript: List of transcript segments (with translations)
+        style: Style of catchphrase (homeshopping, viral, informative)
+
+    Returns:
+        Tuple of (success, message, catchphrase)
+    """
+    if not settings.OPENAI_API_KEY:
+        return False, "OpenAI API key not configured", ""
+
+    try:
+        client = get_openai_client()
+
+        # Combine translated text
+        if transcript and transcript[0].translated:
+            full_text = " ".join([seg.translated for seg in transcript if seg.translated])
+        else:
+            full_text = " ".join([seg.text for seg in transcript])
+
+        style_guides = {
+            "homeshopping": """홈쇼핑 스타일의 임팩트 있는 문구를 만드세요.
+- "이거 하나면 끝!" 같은 강렬한 어필
+- 혜택/효과 강조
+- 숫자 활용 (예: "10초만에", "50% 절약")
+- 질문형도 OK (예: "아직도 힘들게?")""",
+            "viral": """바이럴 쇼츠 스타일의 호기심 유발 문구를 만드세요.
+- 궁금증 유발
+- 반전/놀라움 암시
+- 이모지 1-2개 사용 가능""",
+            "informative": """정보성 콘텐츠 스타일의 명확한 문구를 만드세요.
+- 핵심 정보 전달
+- 간결하고 명확하게""",
+        }
+
+        style_guide = style_guides.get(style, style_guides["homeshopping"])
+
+        system_prompt = f"""당신은 YouTube Shorts 썸네일 문구 전문가입니다.
+
+{style_guide}
+
+규칙:
+- 반드시 15자 이내!
+- 한 줄로 작성
+- 한글만 사용 (영어/한자 금지)
+- 출력은 문구만! (설명 없이)
+
+예시 출력:
+이거 하나면 끝!
+10초면 완성!
+아직도 힘들게?
+진짜 이게 돼요?"""
+
+        response = client.chat.completions.create(
+            model=settings.OPENAI_MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"다음 영상 내용으로 썸네일 문구를 만들어주세요:\n\n{full_text[:500]}"}
+            ],
+            temperature=0.8,
+            max_tokens=50,
+        )
+
+        catchphrase = response.choices[0].message.content.strip()
+        # Clean up
+        catchphrase = catchphrase.strip('"\'""''')
+
+        # Ensure max length
+        if len(catchphrase) > 20:
+            catchphrase = catchphrase[:20]
+
+        return True, "Catchphrase generated", catchphrase
+
+    except Exception as e:
+        return False, f"GPT error: {str(e)}", ""
+
+
+def add_text_overlay(
+    image_path: str,
+    output_path: str,
+    text: str,
+    font_size: int = 80,
+    font_color: str = "#FFFFFF",
+    stroke_color: str = "#000000",
+    stroke_width: int = 4,
+    position: str = "center",
+    font_name: str = "NanumGothicBold",
+) -> Tuple[bool, str]:
+    """
+    Add text overlay to image using PIL.
+
+    Args:
+        image_path: Input image path
+        output_path: Output image path
+        text: Text to overlay
+        font_size: Font size in pixels
+        font_color: Text color (hex)
+        stroke_color: Outline color (hex)
+        stroke_width: Outline thickness
+        position: Text position (top, center, bottom)
+        font_name: Font family name
+
+    Returns:
+        Tuple of (success, message)
+    """
+    try:
+        # Open image
+        img = Image.open(image_path)
+        draw = ImageDraw.Draw(img)
+        img_width, img_height = img.size
+
+        # Maximum text width (90% of image width)
+        max_text_width = int(img_width * 0.9)
+
+        # Try to load font
+        def load_font(size):
+            font_paths = [
+                f"/usr/share/fonts/truetype/nanum/{font_name}.ttf",
+                f"/usr/share/fonts/opentype/nanum/{font_name}.otf",
+                f"/System/Library/Fonts/{font_name}.ttf",
+                f"/Library/Fonts/{font_name}.ttf",
+                f"~/Library/Fonts/{font_name}.ttf",
+                f"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
+            ]
+            for path in font_paths:
+                expanded_path = os.path.expanduser(path)
+                if os.path.exists(expanded_path):
+                    try:
+                        return ImageFont.truetype(expanded_path, size)
+                    except:
+                        continue
+            return None
+
+        font = load_font(font_size)
+        if font is None:
+            font = ImageFont.load_default()
+            font_size = 40
+
+        # Check text width and adjust if necessary
+        bbox = draw.textbbox((0, 0), text, font=font)
+        text_width = bbox[2] - bbox[0]
+
+        lines = [text]
+
+        if text_width > max_text_width:
+            # Try splitting into 2 lines first
+            mid = len(text) // 2
+            # Find best split point near middle (at space or comma if exists)
+            split_pos = mid
+            for i in range(mid, max(0, mid - 5), -1):
+                if text[i] in ' ,、，':
+                    split_pos = i + 1
+                    break
+            for i in range(mid, min(len(text), mid + 5)):
+                if text[i] in ' ,、，':
+                    split_pos = i + 1
+                    break
+
+            # Split text into 2 lines
+            line1 = text[:split_pos].strip()
+            line2 = text[split_pos:].strip()
+            lines = [line1, line2] if line2 else [line1]
+
+            # Check if 2-line version fits
+            max_line_width = max(
+                draw.textbbox((0, 0), line, font=font)[2] - draw.textbbox((0, 0), line, font=font)[0]
+                for line in lines
+            )
+
+            # If still too wide, reduce font size
+            while max_line_width > max_text_width and font_size > 40:
+                font_size -= 5
+                font = load_font(font_size)
+                if font is None:
+                    font = ImageFont.load_default()
+                    break
+                max_line_width = max(
+                    draw.textbbox((0, 0), line, font=font)[2] - draw.textbbox((0, 0), line, font=font)[0]
+                    for line in lines
+                )
+
+        # Calculate total text height for multi-line
+        line_height = font_size + 10
+        total_height = line_height * len(lines)
+
+        # Calculate starting y position
+        if position == "top":
+            start_y = img_height // 6
+        elif position == "bottom":
+            start_y = img_height - img_height // 4 - total_height
+        else:  # center
+            start_y = (img_height - total_height) // 2
+
+        # Convert hex colors to RGB
+        def hex_to_rgb(hex_color):
+            hex_color = hex_color.lstrip('#')
+            return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+
+        text_rgb = hex_to_rgb(font_color)
+        stroke_rgb = hex_to_rgb(stroke_color)
+
+        # Draw each line
+        for i, line in enumerate(lines):
+            bbox = draw.textbbox((0, 0), line, font=font)
+            line_width = bbox[2] - bbox[0]
+            # Account for left bearing (bbox[0]) to prevent first character cut-off
+            # Some fonts/characters have non-zero left offset
+            x = (img_width - line_width) // 2 - bbox[0]
+            y = start_y + i * line_height
+
+            # Draw text with stroke (outline)
+            for dx in range(-stroke_width, stroke_width + 1):
+                for dy in range(-stroke_width, stroke_width + 1):
+                    if dx != 0 or dy != 0:
+                        draw.text((x + dx, y + dy), line, font=font, fill=stroke_rgb)
+
+            # Draw main text
+            draw.text((x, y), line, font=font, fill=text_rgb)
+
+        # Save
+        img.save(output_path, "JPEG", quality=95)
+
+        return True, "Text overlay added"
+
+    except Exception as e:
+        return False, f"Text overlay error: {str(e)}"
+
+
+async def generate_thumbnail(
+    job_id: str,
+    video_path: str,
+    transcript: List[TranscriptSegment],
+    timestamp: float = 2.0,
+    style: str = "homeshopping",
+    custom_text: Optional[str] = None,
+    font_size: int = 80,
+    position: str = "center",
+) -> Tuple[bool, str, Optional[str]]:
+    """
+    Generate a complete thumbnail with text overlay.
+
+    Args:
+        job_id: Job ID for naming
+        video_path: Path to video file
+        transcript: Transcript segments
+        timestamp: Time to extract frame
+        style: Catchphrase style
+        custom_text: Custom text (skip GPT generation)
+        font_size: Font size
+        position: Text position
+
+    Returns:
+        Tuple of (success, message, thumbnail_path)
+    """
+    # Paths
+    frame_path = os.path.join(settings.PROCESSED_DIR, f"{job_id}_frame.jpg")
+    thumbnail_path = os.path.join(settings.PROCESSED_DIR, f"{job_id}_thumbnail.jpg")
+
+    # Step 1: Extract frame
+    success, msg = await extract_frame(video_path, frame_path, timestamp)
+    if not success:
+        return False, msg, None
+
+    # Step 2: Generate or use custom text
+    if custom_text:
+        catchphrase = custom_text
+    else:
+        success, msg, catchphrase = await generate_catchphrase(transcript, style)
+        if not success:
+            # Fallback: use first translation
+            catchphrase = transcript[0].translated if transcript and transcript[0].translated else "확인해보세요!"
+
+    # Step 3: Add text overlay
+    success, msg = add_text_overlay(
+        frame_path,
+        thumbnail_path,
+        catchphrase,
+        font_size=font_size,
+        position=position,
+    )
+
+    if not success:
+        return False, msg, None
+
+    # Cleanup frame
+    if os.path.exists(frame_path):
+        os.remove(frame_path)
+
+    return True, f"Thumbnail generated: {catchphrase}", thumbnail_path
+
+
+async def get_video_timestamps(video_path: str, count: int = 5) -> List[float]:
+    """
+    Get evenly distributed timestamps from video for thumbnail selection.
+
+    Args:
+        video_path: Path to video
+        count: Number of timestamps to return
+
+    Returns:
+        List of timestamps in seconds
+    """
+    try:
+        cmd = [
+            "ffprobe", "-v", "error",
+            "-show_entries", "format=duration",
+            "-of", "default=noprint_wrappers=1:nokey=1",
+            video_path
+        ]
+
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        duration = float(result.stdout.strip())
+
+        # Generate evenly distributed timestamps (skip first and last 10%)
+        start = duration * 0.1
+        end = duration * 0.9
+        step = (end - start) / (count - 1) if count > 1 else 0
+
+        timestamps = [start + i * step for i in range(count)]
+        return timestamps
+
+    except Exception:
+        return [1.0, 3.0, 5.0, 7.0, 10.0]  # Fallback
--- a/backend/app/services/transcriber.py
+++ b/backend/app/services/transcriber.py
@@ -0,0 +1,421 @@
+import whisper
+import asyncio
+import os
+from typing import List, Optional, Tuple
+from app.models.schemas import TranscriptSegment
+from app.config import settings
+
+# Global model cache
+_model = None
+
+
+def get_whisper_model():
+    """Load Whisper model (cached)."""
+    global _model
+    if _model is None:
+        print(f"Loading Whisper model: {settings.WHISPER_MODEL}")
+        _model = whisper.load_model(settings.WHISPER_MODEL)
+    return _model
+
+
+async def check_audio_availability(video_path: str) -> Tuple[bool, str]:
+    """
+    Check if video has usable audio for transcription.
+
+    Returns:
+        Tuple of (has_audio, message)
+    """
+    from app.services.video_processor import has_audio_stream, get_audio_volume_info, is_audio_silent
+
+    # Check if audio stream exists
+    if not await has_audio_stream(video_path):
+        return False, "no_audio_stream"
+
+    # Check if audio is silent
+    volume_info = await get_audio_volume_info(video_path)
+    if is_audio_silent(volume_info):
+        return False, "audio_silent"
+
+    return True, "audio_ok"
+
+
+async def transcribe_video(
+    video_path: str,
+    use_noise_reduction: bool = True,
+    noise_reduction_level: str = "medium",
+    use_vocal_separation: bool = False,
+    progress_callback: Optional[callable] = None,
+) -> Tuple[bool, str, Optional[List[TranscriptSegment]]]:
+    """
+    Transcribe video audio using Whisper.
+
+    Args:
+        video_path: Path to video file
+        use_noise_reduction: Whether to apply noise reduction before transcription
+        noise_reduction_level: "light", "medium", or "heavy"
+        use_vocal_separation: Whether to separate vocals from background music first
+        progress_callback: Optional async callback function(step: str, progress: int) for progress updates
+
+    Returns:
+        Tuple of (success, message, segments, detected_language)
+        - success=False with message="NO_AUDIO" means video has no audio
+        - success=False with message="SILENT_AUDIO" means audio is too quiet
+        - success=False with message="SINGING_ONLY" means only singing detected (no speech)
+    """
+    # Helper to call progress callback if provided
+    async def report_progress(step: str, progress: int):
+        print(f"[Transcriber] report_progress: {step} ({progress}%), has_callback: {progress_callback is not None}")
+        if progress_callback:
+            await progress_callback(step, progress)
+
+    if not os.path.exists(video_path):
+        return False, f"Video file not found: {video_path}", None, None
+
+    # Check audio availability
+    has_audio, audio_status = await check_audio_availability(video_path)
+    if not has_audio:
+        if audio_status == "no_audio_stream":
+            return False, "NO_AUDIO", None, None
+        elif audio_status == "audio_silent":
+            return False, "SILENT_AUDIO", None, None
+
+    audio_path = video_path  # Default to video path (Whisper can handle it)
+    temp_files = []  # Track temp files for cleanup
+
+    try:
+        video_dir = os.path.dirname(video_path)
+
+        # Step 1: Vocal separation (if enabled)
+        if use_vocal_separation:
+            from app.services.audio_separator import separate_vocals, analyze_vocal_type
+
+            await report_progress("vocal_separation", 15)
+            print("Separating vocals from background music...")
+            separation_dir = os.path.join(video_dir, "separated")
+
+            success, message, vocals_path, _ = await separate_vocals(
+                video_path,
+                separation_dir
+            )
+
+            if success and vocals_path:
+                print(f"Vocal separation complete: {vocals_path}")
+                temp_files.append(separation_dir)
+
+                # Analyze if vocals are speech or singing
+                print("Analyzing vocal type (speech vs singing)...")
+                vocal_type, confidence = await analyze_vocal_type(vocals_path)
+                print(f"Vocal analysis: {vocal_type} (confidence: {confidence:.2f})")
+
+                # Treat as singing if:
+                # 1. Explicitly detected as singing
+                # 2. Mixed with low confidence (< 0.6) - likely music, not clear speech
+                if vocal_type == "singing" or (vocal_type == "mixed" and confidence < 0.6):
+                    # Only singing/music detected - no clear speech to transcribe
+                    _cleanup_temp_files(temp_files)
+                    reason = "SINGING_ONLY" if vocal_type == "singing" else "MUSIC_DOMINANT"
+                    print(f"No clear speech detected ({reason}), awaiting manual subtitle")
+                    return False, "SINGING_ONLY", None, None
+
+                # Use vocals for transcription
+                audio_path = vocals_path
+            else:
+                print(f"Vocal separation failed: {message}, continuing with original audio")
+
+        # Step 2: Apply noise reduction (if enabled and not using separated vocals)
+        if use_noise_reduction and audio_path == video_path:
+            from app.services.video_processor import extract_audio_with_noise_reduction
+
+            await report_progress("extracting_audio", 20)
+            cleaned_path = os.path.join(video_dir, "audio_cleaned.wav")
+
+            await report_progress("noise_reduction", 25)
+            print(f"Applying {noise_reduction_level} noise reduction...")
+            success, message = await extract_audio_with_noise_reduction(
+                video_path,
+                cleaned_path,
+                noise_reduction_level
+            )
+
+            if success:
+                print(f"Noise reduction complete: {message}")
+                audio_path = cleaned_path
+                temp_files.append(cleaned_path)
+            else:
+                print(f"Noise reduction failed: {message}, falling back to original audio")
+
+        # Step 3: Transcribe with Whisper
+        await report_progress("transcribing", 35)
+        model = get_whisper_model()
+
+        print(f"Transcribing audio: {audio_path}")
+        # Run Whisper in thread pool to avoid blocking the event loop
+        result = await asyncio.to_thread(
+            model.transcribe,
+            audio_path,
+            task="transcribe",
+            language=None,  # Auto-detect
+            verbose=False,
+            word_timestamps=True,
+        )
+
+        # Split long segments using word-level timestamps
+        segments = _split_segments_by_words(
+            result.get("segments", []),
+            max_duration=2.0,  # Maximum segment duration in seconds (shorter for better sync)
+            min_words=1,       # Minimum words per segment
+        )
+
+        # Clean up temp files
+        _cleanup_temp_files(temp_files)
+
+        detected_lang = result.get("language", "unknown")
+        print(f"Detected language: {detected_lang}")
+        extras = []
+        if use_vocal_separation:
+            extras.append("vocal separation")
+        if use_noise_reduction:
+            extras.append(f"noise reduction: {noise_reduction_level}")
+        extra_info = f" ({', '.join(extras)})" if extras else ""
+
+        # Return tuple with 4 elements: success, message, segments, detected_language
+        return True, f"Transcription complete (detected: {detected_lang}){extra_info}", segments, detected_lang
+
+    except Exception as e:
+        _cleanup_temp_files(temp_files)
+        return False, f"Transcription error: {str(e)}", None, None
+
+
+def _split_segments_by_words(
+    raw_segments: list,
+    max_duration: float = 4.0,
+    min_words: int = 2,
+) -> List[TranscriptSegment]:
+    """
+    Split long Whisper segments into shorter ones using word-level timestamps.
+
+    Args:
+        raw_segments: Raw segments from Whisper output
+        max_duration: Maximum duration for each segment in seconds
+        min_words: Minimum words per segment (to avoid single-word segments)
+
+    Returns:
+        List of TranscriptSegment with shorter durations
+    """
+    segments = []
+
+    for seg in raw_segments:
+        words = seg.get("words", [])
+        seg_text = seg.get("text", "").strip()
+        seg_start = seg.get("start", 0)
+        seg_end = seg.get("end", 0)
+        seg_duration = seg_end - seg_start
+
+        # If no word timestamps or segment is short enough, use as-is
+        if not words or seg_duration <= max_duration:
+            segments.append(TranscriptSegment(
+                start=seg_start,
+                end=seg_end,
+                text=seg_text,
+            ))
+            continue
+
+        # Split segment using word timestamps
+        current_words = []
+        current_start = None
+
+        for i, word in enumerate(words):
+            word_start = word.get("start", seg_start)
+            word_end = word.get("end", seg_end)
+            word_text = word.get("word", "").strip()
+
+            if not word_text:
+                continue
+
+            # Start a new segment
+            if current_start is None:
+                current_start = word_start
+
+            current_words.append(word_text)
+            current_duration = word_end - current_start
+
+            # Check if we should split here
+            is_last_word = (i == len(words) - 1)
+            should_split = False
+
+            if is_last_word:
+                should_split = True
+            elif current_duration >= max_duration and len(current_words) >= min_words:
+                should_split = True
+            elif current_duration >= max_duration * 0.5:
+                # Split at natural break points (punctuation) more aggressively
+                if word_text.endswith((',', '.', '!', '?', '。', '，', '！', '？', '、', '；', ';')):
+                    should_split = True
+            elif current_duration >= 1.0 and word_text.endswith(('。', '！', '？', '.', '!', '?')):
+                # Always split at sentence endings if we have at least 1 second of content
+                should_split = True
+
+            if should_split and current_words:
+                # Create segment
+                text = " ".join(current_words)
+                # For Chinese/Japanese, remove spaces between words
+                if any('\u4e00' <= c <= '\u9fff' for c in text):
+                    text = text.replace(" ", "")
+
+                segments.append(TranscriptSegment(
+                    start=current_start,
+                    end=word_end,
+                    text=text,
+                ))
+
+                # Reset for next segment
+                current_words = []
+                current_start = None
+
+    return segments
+
+
+def _cleanup_temp_files(paths: list):
+    """Clean up temporary files and directories."""
+    import shutil
+    for path in paths:
+        try:
+            if os.path.isdir(path):
+                shutil.rmtree(path, ignore_errors=True)
+            elif os.path.exists(path):
+                os.remove(path)
+        except Exception:
+            pass
+
+
+def segments_to_srt(segments: List[TranscriptSegment], use_translated: bool = True) -> str:
+    """Convert segments to SRT format."""
+    srt_lines = []
+
+    for i, seg in enumerate(segments, 1):
+        start_time = format_srt_time(seg.start)
+        end_time = format_srt_time(seg.end)
+        text = seg.translated if use_translated and seg.translated else seg.text
+
+        srt_lines.append(f"{i}")
+        srt_lines.append(f"{start_time} --> {end_time}")
+        srt_lines.append(text)
+        srt_lines.append("")
+
+    return "\n".join(srt_lines)
+
+
+def format_srt_time(seconds: float) -> str:
+    """Format seconds to SRT timestamp format (HH:MM:SS,mmm)."""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    millis = int((seconds % 1) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
+
+
+def segments_to_ass(
+    segments: List[TranscriptSegment],
+    use_translated: bool = True,
+    font_size: int = 28,
+    font_color: str = "FFFFFF",
+    outline_color: str = "000000",
+    font_name: str = "NanumGothic",
+    position: str = "bottom",  # top, center, bottom
+    outline_width: int = 3,
+    bold: bool = True,
+    shadow: int = 1,
+    background_box: bool = True,
+    background_opacity: str = "E0",  # 00=transparent, FF=opaque
+    animation: str = "none",  # none, fade, pop
+    time_offset: float = 0.0,  # Delay all subtitles by this amount (for intro text)
+) -> str:
+    """
+    Convert segments to ASS format with styling.
+
+    Args:
+        segments: List of transcript segments
+        use_translated: Use translated text if available
+        font_size: Font size in pixels
+        font_color: Font color in hex (without #)
+        outline_color: Outline color in hex (without #)
+        font_name: Font family name
+        position: Subtitle position - "top", "center", or "bottom"
+        outline_width: Outline thickness
+        bold: Use bold text
+        shadow: Shadow depth (0-4)
+        background_box: Show semi-transparent background box
+        animation: Animation type - "none", "fade", or "pop"
+        time_offset: Delay all subtitle timings by this amount in seconds (useful when intro text is shown)
+
+    Returns:
+        ASS formatted subtitle string
+    """
+    # ASS Alignment values:
+    # 1=Bottom-Left, 2=Bottom-Center, 3=Bottom-Right
+    # 4=Middle-Left, 5=Middle-Center, 6=Middle-Right
+    # 7=Top-Left,    8=Top-Center,    9=Top-Right
+    alignment_map = {
+        "top": 8,      # Top-Center
+        "center": 5,   # Middle-Center (영상 가운데)
+        "bottom": 2,   # Bottom-Center (기본값)
+    }
+    alignment = alignment_map.get(position, 2)
+
+    # Adjust margin based on position (낮은 값 = 화면 가장자리에 더 가까움)
+    # 원본 자막을 덮기 위해 하단 마진을 작게 설정
+    margin_v = 30 if position == "bottom" else (100 if position == "top" else 10)
+
+    # Bold: -1 = bold, 0 = normal
+    bold_value = -1 if bold else 0
+
+    # BorderStyle: 1 = outline + shadow, 3 = opaque box (background)
+    border_style = 3 if background_box else 1
+
+    # BackColour alpha: use provided opacity or default
+    back_alpha = background_opacity if background_box else "80"
+
+    # ASS header
+    ass_content = f"""[Script Info]
+Title: Shorts Maker Subtitle
+ScriptType: v4.00+
+PlayDepth: 0
+PlayResX: 1080
+PlayResY: 1920
+
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,{font_name},{font_size},&H00{font_color},&H00FFFFFF,&H00{outline_color},&H{back_alpha}000000,{bold_value},0,0,0,100,100,0,0,{border_style},{outline_width},{shadow},{alignment},30,30,{margin_v},1
+
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+"""
+
+    for seg in segments:
+        # Apply time offset (for intro text overlay)
+        start_time = format_ass_time(seg.start + time_offset)
+        end_time = format_ass_time(seg.end + time_offset)
+        text = seg.translated if use_translated and seg.translated else seg.text
+        # Escape special characters
+        text = text.replace("\\", "\\\\").replace("{", "\\{").replace("}", "\\}")
+
+        # Add animation effects
+        if animation == "fade":
+            # Fade in/out effect (250ms)
+            text = f"{{\\fad(250,250)}}{text}"
+        elif animation == "pop":
+            # Pop-in effect with scale animation
+            text = f"{{\\t(0,150,\\fscx110\\fscy110)\\t(150,300,\\fscx100\\fscy100)}}{text}"
+
+        ass_content += f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{text}\n"
+
+    return ass_content
+
+
+def format_ass_time(seconds: float) -> str:
+    """Format seconds to ASS timestamp format (H:MM:SS.cc)."""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    centis = int((seconds % 1) * 100)
+    return f"{hours}:{minutes:02d}:{secs:02d}.{centis:02d}"
--- a/backend/app/services/translator.py
+++ b/backend/app/services/translator.py
@@ -0,0 +1,468 @@
+import re
+from typing import List, Tuple, Optional
+from openai import OpenAI
+from app.models.schemas import TranscriptSegment
+from app.config import settings
+
+
+def get_openai_client() -> OpenAI:
+    """Get OpenAI client."""
+    return OpenAI(api_key=settings.OPENAI_API_KEY)
+
+
+class TranslationMode:
+    """Translation mode options."""
+    DIRECT = "direct"           # 직접 번역 (원본 구조 유지)
+    SUMMARIZE = "summarize"     # 요약 후 번역
+    REWRITE = "rewrite"         # 요약 + 한글 대본 재작성
+
+
+async def shorten_text(client: OpenAI, text: str, max_chars: int) -> str:
+    """
+    Shorten a Korean text to fit within character limit.
+
+    Args:
+        client: OpenAI client
+        text: Text to shorten
+        max_chars: Maximum character count
+
+    Returns:
+        Shortened text
+    """
+    try:
+        response = client.chat.completions.create(
+            model=settings.OPENAI_MODEL,
+            messages=[
+                {
+                    "role": "system",
+                    "content": f"""한국어 자막을 {max_chars}자 이내로 줄이세요.
+
+규칙:
+- 반드시 {max_chars}자 이하!
+- 핵심 의미만 유지
+- 자연스러운 한국어
+- 존댓말 유지
+- 출력은 줄인 문장만!
+
+예시:
+입력: "요리할 때마다 한 시간이 걸리셨죠?" (18자)
+제한: 10자
+출력: "시간 오래 걸리죠" (8자)
+
+입력: "채소 다듬는 데만 30분 걸리셨죠" (16자)
+제한: 10자
+출력: "채소만 30분" (6자)"""
+                },
+                {
+                    "role": "user",
+                    "content": f"입력: \"{text}\" ({len(text)}자)\n제한: {max_chars}자\n출력:"
+                }
+            ],
+            temperature=0.3,
+            max_tokens=50,
+        )
+
+        shortened = response.choices[0].message.content.strip()
+        # Remove quotes, parentheses, and extra characters
+        shortened = shortened.strip('"\'""''')
+        # Remove any trailing parenthetical notes like "(10자)"
+        shortened = re.sub(r'\s*\([^)]*자\)\s*$', '', shortened)
+        shortened = re.sub(r'\s*\(\d+자\)\s*$', '', shortened)
+        # Remove any remaining quotes
+        shortened = shortened.replace('"', '').replace('"', '').replace('"', '')
+        shortened = shortened.replace("'", '').replace("'", '').replace("'", '')
+        shortened = shortened.strip()
+
+        # If still too long, truncate cleanly
+        if len(shortened) > max_chars:
+            shortened = shortened[:max_chars]
+
+        return shortened
+
+    except Exception as e:
+        # Fallback: simple truncation
+        if len(text) > max_chars:
+            return text[:max_chars-1] + "…"
+        return text
+
+
+async def translate_segments(
+    segments: List[TranscriptSegment],
+    target_language: str = "Korean",
+    mode: str = TranslationMode.DIRECT,
+    max_tokens: Optional[int] = None,
+) -> Tuple[bool, str, List[TranscriptSegment]]:
+    """
+    Translate transcript segments to target language using OpenAI.
+
+    Args:
+        segments: List of transcript segments
+        target_language: Target language for translation
+        mode: Translation mode (direct, summarize, rewrite)
+        max_tokens: Maximum output tokens (for cost control)
+
+    Returns:
+        Tuple of (success, message, translated_segments)
+    """
+    if not settings.OPENAI_API_KEY:
+        return False, "OpenAI API key not configured", segments
+
+    try:
+        client = get_openai_client()
+
+        # Batch translate for efficiency
+        texts = [seg.text for seg in segments]
+        combined_text = "\n---\n".join(texts)
+
+        # Calculate video duration for context
+        total_duration = segments[-1].end if segments else 0
+
+        # Calculate segment info for length guidance
+        segment_info = []
+        for i, seg in enumerate(segments):
+            duration = seg.end - seg.start
+            max_chars = int(duration * 5)  # ~5 Korean chars per second (stricter for better sync)
+            segment_info.append(f"[{i+1}] {duration:.1f}초 = 최대 {max_chars}자 (엄수!)")
+
+        # Get custom prompt settings from config
+        gpt_role = settings.GPT_ROLE or "친근한 유튜브 쇼츠 자막 작가"
+        gpt_tone = settings.GPT_TONE or "존댓말"
+        gpt_style = settings.GPT_STYLE or ""
+
+        # Tone examples
+        tone_examples = {
+            "존댓말": '~해요, ~이에요, ~하죠',
+            "반말": '~해, ~야, ~지',
+            "격식체": '~합니다, ~입니다',
+        }
+        tone_example = tone_examples.get(gpt_tone, tone_examples["존댓말"])
+
+        # Additional style instruction
+        style_instruction = f"\n6. Style: {gpt_style}" if gpt_style else ""
+
+        # Select prompt based on mode
+        if mode == TranslationMode.REWRITE:
+            # Build indexed timeline input with Chinese text
+            # Use segment numbers to handle duplicate timestamps
+            timeline_input = []
+            for i, seg in enumerate(segments):
+                mins = int(seg.start // 60)
+                secs = int(seg.start % 60)
+                timeline_input.append(f"[{i+1}] {mins}:{secs:02d} {seg.text}")
+
+            system_prompt = f"""당신은 생활용품 유튜브 쇼츠 자막 작가입니다.
+
+중국어 원문의 "의미"만 참고하여, 한국인이 직접 말하는 것처럼 자연스러운 자막을 작성하세요.
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🎯 핵심 원칙: 번역이 아니라 "재창작"
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 필수 규칙:
+1. 한 문장 = 한 가지 정보 (두 개 이상 금지)
+2. 중복 표현 절대 금지 ("편해요"가 이미 나왔으면 다시 안 씀)
+3. {gpt_tone} 사용 ({tone_example})
+4. 세그먼트 수 유지: 입력 {len(segments)}개 → 출력 {len(segments)}개
+5. 중국어 한자 금지, 순수 한글만
+
+❌ 금지 표현 (번역투):
+- "~할 수 있어요" → "~돼요", "~됩니다"
+- "매우/아주/정말" 남용 → 꼭 필요할 때만
+- "그것은/이것은" → "이거", "이건"
+- "~하는 것이" → 직접 표현으로
+- "편리해요/편해요" 반복 → 한 번만, 이후 다른 표현
+- "좋아요/좋고요" 반복 → 구체적 장점으로 대체
+
+🎵 쇼츠 리듬감:
+- 짧게 끊어서
+- 한 호흡에 하나씩
+- 시청자가 따라 읽을 수 있게
+
+📝 좋은 예시:
+
+원문: "이 작은 박스 디자인이 참 좋네요. 평소에 씨앗 먹을 때 간편하게 먹을 수 있어요."
+❌ 나쁜 번역: "이 작은 박스 디자인이 참 좋네요. 평소에 씨앗 먹을 때 간편하게 먹을 수 있어요."
+✅ 좋은 재창작: "이 작은 박스, 생각보다 정말 잘 만들었어요."
+
+원문: "테이블에 두거나 손에 들고 사용하기에도 좋고요. 침대에 누워서나 사무실에서도 간식이나 과일 먹기 정말 편해요."
+❌ 나쁜 번역: "테이블에 두거나 손에 들고 사용하기에도 좋고요. 침대에 누워서나 사무실에서도 간식이나 과일 먹기 정말 편해요."
+✅ 좋은 재창작 (2개로 분리):
+  - "테이블 위에서도, 침대에서도, 사무실에서도 사용하기 좋고"
+  - "과일 씻고 물기 빼는 데도 활용 가능합니다."
+
+원문: "가정에서 필수 아이템이에요. 정말 유용하죠. 꼭 하나씩 가져야 할 제품이에요."
+❌ 나쁜 번역: 그대로 3문장
+✅ 좋은 재창작: "집에 하나 있으면 은근히 자주 쓰게 됩니다."{style_instruction}
+
+출력 형식:
+[번호] 시간 자막 내용
+
+⚠️ 입력과 동일한 세그먼트 수({len(segments)}개)를 출력하세요!
+⚠️ 각 [번호]는 입력과 1:1 대응해야 합니다!"""
+
+            # Use indexed timeline format for user content
+            combined_text = "[중국어 원문]\n\n" + "\n".join(timeline_input)
+
+        elif mode == TranslationMode.SUMMARIZE:
+            system_prompt = f"""You are: {gpt_role}
+
+Task: Translate Chinese to SHORT Korean subtitles.
+
+Length limits (자막 싱크!):
+{chr(10).join(segment_info)}
+
+Rules:
+1. Use {gpt_tone} ({tone_example})
+2. Summarize to core meaning - be BRIEF
+3. Max one short sentence per segment
+4. {len(segments)} segments separated by '---'{style_instruction}"""
+
+        else:  # DIRECT mode
+            system_prompt = f"""You are: {gpt_role}
+
+Task: Translate Chinese to Korean subtitles.
+
+Length limits (자막 싱크!):
+{chr(10).join(segment_info)}
+
+Rules:
+1. Use {gpt_tone} ({tone_example})
+2. Keep translations SHORT and readable
+3. {len(segments)} segments separated by '---'{style_instruction}"""
+
+        # Build API request
+        request_params = {
+            "model": settings.OPENAI_MODEL,
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": combined_text}
+            ],
+            "temperature": 0.65 if mode == TranslationMode.REWRITE else 0.3,
+        }
+
+        # Add max_tokens if specified (for cost control)
+        effective_max_tokens = max_tokens or settings.TRANSLATION_MAX_TOKENS
+        if effective_max_tokens:
+            # Use higher token limit for REWRITE mode
+            if mode == TranslationMode.REWRITE:
+                request_params["max_tokens"] = max(effective_max_tokens, 700)
+            else:
+                request_params["max_tokens"] = effective_max_tokens
+
+        response = client.chat.completions.create(**request_params)
+
+        translated_text = response.choices[0].message.content
+
+        # Parse based on mode
+        if mode == TranslationMode.REWRITE:
+            # Parse indexed timeline format: "[1] 0:00 자막\n[2] 0:02 자막\n..."
+            indexed_pattern = re.compile(r'^\[(\d+)\]\s*\d+:\d{2}\s+(.+)$', re.MULTILINE)
+            matches = indexed_pattern.findall(translated_text)
+
+            # Create mapping from segment index to translation
+            translations_by_index = {}
+            for idx, text in matches:
+                translations_by_index[int(idx)] = text.strip()
+
+            # Map translations back to segments by index (1-based)
+            for i, seg in enumerate(segments):
+                seg_num = i + 1  # 1-based index
+                if seg_num in translations_by_index:
+                    seg.translated = translations_by_index[seg_num]
+                else:
+                    # No matching translation found - try fallback to old timestamp-based parsing
+                    seg.translated = ""
+
+            # Fallback: if no indexed matches, try old timestamp format
+            if not matches:
+                print("[Warning] No indexed format found, falling back to timestamp parsing")
+                timeline_pattern = re.compile(r'^(\d+):(\d{2})\s+(.+)$', re.MULTILINE)
+                timestamp_matches = timeline_pattern.findall(translated_text)
+
+                # Create mapping from timestamp to translation
+                translations_by_time = {}
+                for mins, secs, text in timestamp_matches:
+                    time_sec = int(mins) * 60 + int(secs)
+                    translations_by_time[time_sec] = text.strip()
+
+                # Track used translations to prevent duplicates
+                used_translations = set()
+
+                # Map translations back to segments by matching start times
+                for seg in segments:
+                    start_sec = int(seg.start)
+                    matched_time = None
+
+                    # Try exact match first
+                    if start_sec in translations_by_time and start_sec not in used_translations:
+                        matched_time = start_sec
+                    else:
+                        # Try to find closest UNUSED match within 1 second
+                        for t in range(start_sec - 1, start_sec + 2):
+                            if t in translations_by_time and t not in used_translations:
+                                matched_time = t
+                                break
+
+                    if matched_time is not None:
+                        seg.translated = translations_by_time[matched_time]
+                        used_translations.add(matched_time)
+                    else:
+                        seg.translated = ""
+        else:
+            # Original parsing for other modes
+            translated_parts = translated_text.split("---")
+            for i, seg in enumerate(segments):
+                if i < len(translated_parts):
+                    seg.translated = translated_parts[i].strip()
+                else:
+                    seg.translated = seg.text  # Fallback to original
+
+        # Calculate token usage for logging
+        usage = response.usage
+        token_info = f"(tokens: {usage.prompt_tokens}+{usage.completion_tokens}={usage.total_tokens})"
+
+        # Post-processing: Shorten segments that exceed character limit
+        # Skip for REWRITE mode - the prompt handles length naturally
+        shortened_count = 0
+        if mode != TranslationMode.REWRITE:
+            chars_per_sec = 5
+            for i, seg in enumerate(segments):
+                if seg.translated:
+                    duration = seg.end - seg.start
+                    max_chars = int(duration * chars_per_sec)
+                    current_len = len(seg.translated)
+
+                    if current_len > max_chars * 1.3 and max_chars >= 5:
+                        seg.translated = await shorten_text(client, seg.translated, max_chars)
+                        shortened_count += 1
+                        print(f"[Shorten] Seg {i+1}: {current_len}→{len(seg.translated)}자 (제한:{max_chars}자)")
+
+        shorten_info = f" [축약:{shortened_count}개]" if shortened_count > 0 else ""
+
+        return True, f"Translation complete [{mode}] {token_info}{shorten_info}", segments
+
+    except Exception as e:
+        return False, f"Translation error: {str(e)}", segments
+
+
+async def generate_shorts_script(
+    segments: List[TranscriptSegment],
+    style: str = "engaging",
+    max_tokens: int = 500,
+) -> Tuple[bool, str, Optional[str]]:
+    """
+    Generate a completely new Korean Shorts script from Chinese transcript.
+
+    Args:
+        segments: Original transcript segments
+        style: Script style (engaging, informative, funny, dramatic)
+        max_tokens: Maximum output tokens
+
+    Returns:
+        Tuple of (success, message, script)
+    """
+    if not settings.OPENAI_API_KEY:
+        return False, "OpenAI API key not configured", None
+
+    try:
+        client = get_openai_client()
+
+        # Combine all text
+        full_text = " ".join([seg.text for seg in segments])
+        total_duration = segments[-1].end if segments else 0
+
+        style_guides = {
+            "engaging": "Use hooks, questions, and emotional expressions. Start with attention-grabbing line.",
+            "informative": "Focus on facts and clear explanations. Use simple, direct language.",
+            "funny": "Add humor, wordplay, and light-hearted tone. Include relatable jokes.",
+            "dramatic": "Build tension and suspense. Use impactful short sentences.",
+        }
+
+        style_guide = style_guides.get(style, style_guides["engaging"])
+
+        system_prompt = f"""You are a viral Korean YouTube Shorts script writer.
+
+Create a COMPLETELY ORIGINAL Korean script inspired by the Chinese video content.
+
+=== CRITICAL: ANTI-PLAGIARISM RULES ===
+- This is NOT translation - it's ORIGINAL CONTENT CREATION
+- NEVER copy sentence structures, word order, or phrasing from original
+- Extract only the CORE IDEA, then write YOUR OWN script from scratch
+- Imagine you're a Korean creator who just learned this interesting fact
+- Add your own personality, reactions, and Korean cultural context
+=======================================
+
+Video duration: ~{int(total_duration)} seconds
+Style: {style}
+Guide: {style_guide}
+
+Output format:
+[0:00] 첫 번째 대사
+[0:03] 두 번째 대사
+...
+
+Requirements:
+- Write in POLITE FORMAL KOREAN (존댓말/경어) - friendly but respectful
+- Each line: 2-3 seconds when spoken aloud
+- Start with a HOOK that grabs attention
+- Use polite Korean expressions: "이거 아세요?", "정말 신기하죠", "근데 여기서 중요한 건요"
+- End with engagement: question, call-to-action, or surprise
+- Make it feel like ORIGINAL Korean content, not a translation"""
+
+        response = client.chat.completions.create(
+            model=settings.OPENAI_MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Chinese transcript:\n{full_text}"}
+            ],
+            temperature=0.7,
+            max_tokens=max_tokens,
+        )
+
+        script = response.choices[0].message.content
+        usage = response.usage
+        token_info = f"(tokens: {usage.total_tokens})"
+
+        return True, f"Script generated [{style}] {token_info}", script
+
+    except Exception as e:
+        return False, f"Script generation error: {str(e)}", None
+
+
+async def translate_single(
+    text: str,
+    target_language: str = "Korean",
+    max_tokens: Optional[int] = None,
+) -> Tuple[bool, str]:
+    """Translate a single text."""
+    if not settings.OPENAI_API_KEY:
+        return False, text
+
+    try:
+        client = get_openai_client()
+
+        request_params = {
+            "model": settings.OPENAI_MODEL,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": f"Translate to {target_language}. Only output the translation, nothing else."
+                },
+                {
+                    "role": "user",
+                    "content": text
+                }
+            ],
+            "temperature": 0.3,
+        }
+
+        if max_tokens:
+            request_params["max_tokens"] = max_tokens
+
+        response = client.chat.completions.create(**request_params)
+
+        translated = response.choices[0].message.content
+        return True, translated.strip()
+
+    except Exception as e:
+        return False, text
--- a/backend/app/services/video_processor.py
+++ b/backend/app/services/video_processor.py
@@ -0,0 +1,659 @@
+import subprocess
+import asyncio
+import os
+from typing import Optional, Tuple
+from app.config import settings
+
+
+async def process_video(
+    input_path: str,
+    output_path: str,
+    subtitle_path: Optional[str] = None,
+    bgm_path: Optional[str] = None,
+    bgm_volume: float = 0.3,
+    keep_original_audio: bool = False,
+    intro_text: Optional[str] = None,
+    intro_duration: float = 0.7,
+    intro_font_size: int = 100,
+) -> Tuple[bool, str]:
+    """
+    Process video: remove audio, add subtitles, add BGM, add intro text.
+
+    Args:
+        input_path: Path to input video
+        output_path: Path for output video
+        subtitle_path: Path to ASS/SRT subtitle file
+        bgm_path: Path to BGM audio file
+        bgm_volume: Volume level for BGM (0.0 - 1.0)
+        keep_original_audio: Whether to keep original audio
+        intro_text: Text to display at the beginning of video (YouTube Shorts thumbnail)
+        intro_duration: How long to display intro text (seconds)
+        intro_font_size: Font size for intro text (100-120 recommended)
+
+    Returns:
+        Tuple of (success, message)
+    """
+    if not os.path.exists(input_path):
+        return False, f"Input video not found: {input_path}"
+
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+    # Build FFmpeg command
+    cmd = ["ffmpeg", "-y"]  # -y to overwrite
+
+    # Input video
+    cmd.extend(["-i", input_path])
+
+    # Input BGM if provided (stream_loop must come BEFORE -i)
+    if bgm_path and os.path.exists(bgm_path):
+        cmd.extend(["-stream_loop", "-1"])  # Loop BGM infinitely
+        cmd.extend(["-i", bgm_path])
+
+    # Build filter complex
+    filter_parts = []
+    audio_parts = []
+
+    # Audio handling
+    if keep_original_audio and bgm_path and os.path.exists(bgm_path):
+        # Mix original audio with BGM
+        filter_parts.append(f"[0:a]volume=1.0[original]")
+        filter_parts.append(f"[1:a]volume={bgm_volume}[bgm]")
+        filter_parts.append(f"[original][bgm]amix=inputs=2:duration=shortest[audio]")
+        audio_output = "[audio]"
+    elif bgm_path and os.path.exists(bgm_path):
+        # BGM only (no original audio)
+        filter_parts.append(f"[1:a]volume={bgm_volume}[audio]")
+        audio_output = "[audio]"
+    elif keep_original_audio:
+        # Original audio only
+        audio_output = "0:a"
+    else:
+        # No audio
+        audio_output = None
+
+    # Build video filter chain
+    video_filters = []
+
+    # Note: We no longer use tpad to add frozen frames, as it extends the video duration.
+    # Instead, intro text is simply overlaid on the existing video content.
+
+    # 2. Add subtitle overlay if provided
+    if subtitle_path and os.path.exists(subtitle_path):
+        escaped_path = subtitle_path.replace("\\", "/").replace(":", "\\:").replace("'", "\\'")
+        video_filters.append(f"ass='{escaped_path}'")
+
+    # 3. Add intro text overlay if provided (shown during frozen frame portion)
+    if intro_text:
+        # Find a suitable font - try common Korean fonts
+        font_options = [
+            "/System/Library/Fonts/Supplemental/AppleGothic.ttf",  # macOS Korean
+            "/System/Library/Fonts/AppleSDGothicNeo.ttc",  # macOS Korean
+            "/usr/share/fonts/truetype/nanum/NanumGothicBold.ttf",  # Linux Korean
+            "/usr/share/fonts/opentype/noto/NotoSansCJK-Bold.ttc",  # Linux CJK
+        ]
+
+        font_file = None
+        for font in font_options:
+            if os.path.exists(font):
+                font_file = font.replace(":", "\\:")
+                break
+
+        # Adjust font size and split text if too long
+        # Shorts video is 1080 width, so ~10-12 chars fit comfortably at 100px
+        text_len = len(intro_text)
+        adjusted_font_size = intro_font_size
+
+        # Split into 2 lines if text is long (more than 10 chars)
+        lines = []
+        if text_len > 10:
+            # Find best split point near middle
+            mid = text_len // 2
+            split_pos = mid
+            for i in range(mid, max(0, mid - 5), -1):
+                if intro_text[i] in ' ,、，':
+                    split_pos = i + 1
+                    break
+            for i in range(mid, min(text_len, mid + 5)):
+                if intro_text[i] in ' ,、，':
+                    split_pos = i + 1
+                    break
+
+            line1 = intro_text[:split_pos].strip()
+            line2 = intro_text[split_pos:].strip()
+            if line2:
+                lines = [line1, line2]
+            else:
+                lines = [intro_text]
+        else:
+            lines = [intro_text]
+
+        # Adjust font size based on longest line length
+        max_line_len = max(len(line) for line in lines)
+        if max_line_len > 12:
+            adjusted_font_size = int(intro_font_size * 10 / max_line_len)
+            adjusted_font_size = max(50, min(adjusted_font_size, intro_font_size))  # Clamp between 50-100
+
+        # Add fade effect timing
+        fade_out_start = max(0.1, intro_duration - 0.3)
+        alpha_expr = f"if(gt(t,{fade_out_start}),(({intro_duration}-t)/0.3),1)"
+
+        # Create drawtext filter(s) for each line
+        line_height = adjusted_font_size + 20
+        total_height = line_height * len(lines)
+
+        for i, line in enumerate(lines):
+            escaped_text = line.replace("'", "\\'").replace(":", "\\:").replace("\\", "\\\\")
+
+            # Calculate y position for this line (centered overall)
+            if len(lines) == 1:
+                y_expr = "(h-text_h)/2"
+            else:
+                # Center the block of lines, then position each line
+                y_offset = int((i - (len(lines) - 1) / 2) * line_height)
+                y_expr = f"(h-text_h)/2+{y_offset}"
+
+            drawtext_parts = [
+                f"text='{escaped_text}'",
+                f"fontsize={adjusted_font_size}",
+                "fontcolor=white",
+                "x=(w-text_w)/2",  # Center horizontally
+                f"y={y_expr}",
+                f"enable='lt(t,{intro_duration})'",
+                "borderw=3",
+                "bordercolor=black",
+                "box=1",
+                "boxcolor=black@0.6",
+                "boxborderw=15",
+                f"alpha='{alpha_expr}'",
+            ]
+
+            if font_file:
+                drawtext_parts.insert(1, f"fontfile='{font_file}'")
+
+            video_filters.append(f"drawtext={':'.join(drawtext_parts)}")
+
+    # Combine video filters
+    video_filter_str = ",".join(video_filters) if video_filters else None
+
+    # Construct FFmpeg command
+    if filter_parts or video_filter_str:
+        if filter_parts and video_filter_str:
+            full_filter = ";".join(filter_parts) + f";[0:v]{video_filter_str}[vout]"
+            cmd.extend(["-filter_complex", full_filter])
+            cmd.extend(["-map", "[vout]"])
+            if audio_output and audio_output.startswith("["):
+                cmd.extend(["-map", audio_output])
+            elif audio_output:
+                cmd.extend(["-map", audio_output])
+        elif video_filter_str:
+            cmd.extend(["-vf", video_filter_str])
+            if bgm_path and os.path.exists(bgm_path):
+                cmd.extend(["-filter_complex", f"[1:a]volume={bgm_volume}[audio]"])
+                cmd.extend(["-map", "0:v", "-map", "[audio]"])
+            elif not keep_original_audio:
+                cmd.extend(["-an"])  # No audio
+        elif filter_parts:
+            cmd.extend(["-filter_complex", ";".join(filter_parts)])
+            cmd.extend(["-map", "0:v"])
+            if audio_output and audio_output.startswith("["):
+                cmd.extend(["-map", audio_output])
+    else:
+        if not keep_original_audio:
+            cmd.extend(["-an"])
+
+    # Output settings
+    cmd.extend([
+        "-c:v", "libx264",
+        "-preset", "medium",
+        "-crf", "23",
+        "-c:a", "aac",
+        "-b:a", "128k",
+        "-shortest",
+        output_path
+    ])
+
+    try:
+        # Run FFmpeg in thread pool to avoid blocking the event loop
+        result = await asyncio.to_thread(
+            subprocess.run,
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=600,  # 10 minute timeout
+        )
+
+        if result.returncode != 0:
+            error_msg = result.stderr[-500:] if result.stderr else "Unknown error"
+            return False, f"FFmpeg error: {error_msg}"
+
+        if os.path.exists(output_path):
+            return True, "Video processing complete"
+        else:
+            return False, "Output file not created"
+
+    except subprocess.TimeoutExpired:
+        return False, "Processing timed out"
+    except Exception as e:
+        return False, f"Processing error: {str(e)}"
+
+
+async def get_video_duration(video_path: str) -> Optional[float]:
+    """Get video duration in seconds."""
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-show_entries", "format=duration",
+        "-of", "default=noprint_wrappers=1:nokey=1",
+        video_path
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+        if result.returncode == 0:
+            return float(result.stdout.strip())
+    except Exception:
+        pass
+
+    return None
+
+
+async def get_video_info(video_path: str) -> Optional[dict]:
+    """Get video information (duration, resolution, etc.)."""
+    import json as json_module
+
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-select_streams", "v:0",
+        "-show_entries", "stream=width,height,duration:format=duration",
+        "-of", "json",
+        video_path
+    ]
+
+    try:
+        result = await asyncio.to_thread(
+            subprocess.run,
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        if result.returncode == 0:
+            data = json_module.loads(result.stdout)
+            info = {}
+
+            # Get duration from format (more reliable)
+            if "format" in data and "duration" in data["format"]:
+                info["duration"] = float(data["format"]["duration"])
+
+            # Get resolution from stream
+            if "streams" in data and len(data["streams"]) > 0:
+                stream = data["streams"][0]
+                info["width"] = stream.get("width")
+                info["height"] = stream.get("height")
+
+            return info if info else None
+    except Exception:
+        pass
+
+    return None
+
+
+async def trim_video(
+    input_path: str,
+    output_path: str,
+    start_time: float,
+    end_time: float,
+) -> Tuple[bool, str]:
+    """
+    Trim video to specified time range.
+
+    Args:
+        input_path: Path to input video
+        output_path: Path for output video
+        start_time: Start time in seconds
+        end_time: End time in seconds
+
+    Returns:
+        Tuple of (success, message)
+    """
+    if not os.path.exists(input_path):
+        return False, f"Input video not found: {input_path}"
+
+    # Validate time range
+    duration = await get_video_duration(input_path)
+    if duration is None:
+        return False, "Could not get video duration"
+
+    if start_time < 0:
+        start_time = 0
+    if end_time > duration:
+        end_time = duration
+    if start_time >= end_time:
+        return False, f"Invalid time range: start ({start_time}) >= end ({end_time})"
+
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+    trim_duration = end_time - start_time
+
+    # Log trim parameters for debugging
+    print(f"[Trim] Input: {input_path}")
+    print(f"[Trim] Original duration: {duration:.3f}s")
+    print(f"[Trim] Requested: start={start_time:.3f}s, end={end_time:.3f}s")
+    print(f"[Trim] Output duration should be: {trim_duration:.3f}s")
+
+    # Use -ss BEFORE -i for input seeking (faster and more reliable for end trimming)
+    # Combined with -t for accurate duration control
+    # -accurate_seek ensures frame-accurate seeking
+    cmd = [
+        "ffmpeg", "-y",
+        "-accurate_seek",                # Enable accurate seeking
+        "-ss", str(start_time),          # Input seeking (before -i)
+        "-i", input_path,
+        "-t", str(trim_duration),        # Duration of output
+        "-c:v", "libx264",               # Re-encode video for accurate cut
+        "-preset", "fast",               # Fast encoding preset
+        "-crf", "18",                    # High quality (lower = better)
+        "-c:a", "aac",                   # Re-encode audio
+        "-b:a", "128k",                  # Audio bitrate
+        "-avoid_negative_ts", "make_zero",  # Fix timestamp issues
+        output_path
+    ]
+
+    print(f"[Trim] Command: {' '.join(cmd)}")
+
+    try:
+        result = await asyncio.to_thread(
+            subprocess.run,
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+
+        if result.returncode != 0:
+            error_msg = result.stderr[-300:] if result.stderr else "Unknown error"
+            print(f"[Trim] FFmpeg error: {error_msg}")
+            return False, f"Trim failed: {error_msg}"
+
+        if os.path.exists(output_path):
+            new_duration = await get_video_duration(output_path)
+            print(f"[Trim] Success! New duration: {new_duration:.3f}s (expected: {trim_duration:.3f}s)")
+            print(f"[Trim] Difference from expected: {abs(new_duration - trim_duration):.3f}s")
+            return True, f"Video trimmed successfully ({new_duration:.1f}s)"
+        else:
+            print("[Trim] Error: Output file not created")
+            return False, "Output file not created"
+
+    except subprocess.TimeoutExpired:
+        print("[Trim] Error: Timeout")
+        return False, "Trim operation timed out"
+    except Exception as e:
+        print(f"[Trim] Error: {str(e)}")
+        return False, f"Trim error: {str(e)}"
+
+
+async def extract_frame(
+    video_path: str,
+    output_path: str,
+    timestamp: float,
+) -> Tuple[bool, str]:
+    """
+    Extract a single frame from video at specified timestamp.
+
+    Args:
+        video_path: Path to input video
+        output_path: Path for output image (jpg/png)
+        timestamp: Time in seconds
+
+    Returns:
+        Tuple of (success, message)
+    """
+    if not os.path.exists(video_path):
+        return False, f"Video not found: {video_path}"
+
+    os.makedirs(os.path.dirname(output_path), exist_ok=True)
+
+    cmd = [
+        "ffmpeg", "-y",
+        "-ss", str(timestamp),
+        "-i", video_path,
+        "-frames:v", "1",
+        "-q:v", "2",
+        output_path
+    ]
+
+    try:
+        result = await asyncio.to_thread(
+            subprocess.run,
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+
+        if result.returncode == 0 and os.path.exists(output_path):
+            return True, "Frame extracted"
+        return False, result.stderr[-200:] if result.stderr else "Unknown error"
+    except Exception as e:
+        return False, str(e)
+
+
+async def get_audio_duration(audio_path: str) -> Optional[float]:
+    """Get audio duration in seconds."""
+    return await get_video_duration(audio_path)  # Same command works
+
+
+async def extract_audio(video_path: str, output_path: str) -> Tuple[bool, str]:
+    """Extract audio from video."""
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", video_path,
+        "-vn",
+        "-acodec", "pcm_s16le",
+        "-ar", "16000",
+        "-ac", "1",
+        output_path
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+        if result.returncode == 0:
+            return True, "Audio extracted"
+        return False, result.stderr
+    except Exception as e:
+        return False, str(e)
+
+
+async def extract_audio_with_noise_reduction(
+    video_path: str,
+    output_path: str,
+    noise_reduction_level: str = "medium"
+) -> Tuple[bool, str]:
+    """
+    Extract audio from video with noise reduction for better STT accuracy.
+
+    Args:
+        video_path: Path to input video
+        output_path: Path for output audio (WAV format recommended)
+        noise_reduction_level: "light", "medium", or "heavy"
+
+    Returns:
+        Tuple of (success, message)
+    """
+    if not os.path.exists(video_path):
+        return False, f"Video file not found: {video_path}"
+
+    # Build audio filter chain based on noise reduction level
+    filters = []
+
+    # 1. High-pass filter: Remove low frequency rumble (< 80Hz)
+    filters.append("highpass=f=80")
+
+    # 2. Low-pass filter: Remove high frequency hiss (> 8000Hz for speech)
+    filters.append("lowpass=f=8000")
+
+    if noise_reduction_level == "light":
+        # Light: Just basic frequency filtering
+        pass
+
+    elif noise_reduction_level == "medium":
+        # Medium: Add FFT-based denoiser
+        # afftdn: nr=noise reduction amount (0-100), nf=noise floor
+        filters.append("afftdn=nf=-25:nr=10:nt=w")
+
+    elif noise_reduction_level == "heavy":
+        # Heavy: More aggressive noise reduction
+        filters.append("afftdn=nf=-20:nr=20:nt=w")
+        # Add dynamic range compression to normalize volume
+        filters.append("acompressor=threshold=-20dB:ratio=4:attack=5:release=50")
+
+    # 3. Normalize audio levels
+    filters.append("loudnorm=I=-16:TP=-1.5:LRA=11")
+
+    filter_chain = ",".join(filters)
+
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", video_path,
+        "-vn",  # No video
+        "-af", filter_chain,
+        "-acodec", "pcm_s16le",  # PCM format for Whisper
+        "-ar", "16000",  # 16kHz sample rate (Whisper optimal)
+        "-ac", "1",  # Mono
+        output_path
+    ]
+
+    try:
+        # Run FFmpeg in thread pool to avoid blocking the event loop
+        result = await asyncio.to_thread(
+            subprocess.run,
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+
+        if result.returncode != 0:
+            error_msg = result.stderr[-300:] if result.stderr else "Unknown error"
+            return False, f"Audio extraction failed: {error_msg}"
+
+        if os.path.exists(output_path):
+            return True, f"Audio extracted with {noise_reduction_level} noise reduction"
+        else:
+            return False, "Output file not created"
+
+    except subprocess.TimeoutExpired:
+        return False, "Audio extraction timed out"
+    except Exception as e:
+        return False, f"Audio extraction error: {str(e)}"
+
+
+async def analyze_audio_noise_level(audio_path: str) -> Optional[dict]:
+    """
+    Analyze audio to detect noise level.
+
+    Returns dict with mean_volume, max_volume, noise_floor estimates.
+    """
+    cmd = [
+        "ffmpeg",
+        "-i", audio_path,
+        "-af", "volumedetect",
+        "-f", "null",
+        "-"
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        stderr = result.stderr
+
+        # Parse volume detection output
+        info = {}
+        for line in stderr.split('\n'):
+            if 'mean_volume' in line:
+                info['mean_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))
+            elif 'max_volume' in line:
+                info['max_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))
+
+        return info if info else None
+
+    except Exception:
+        return None
+
+
+async def has_audio_stream(video_path: str) -> bool:
+    """
+    Check if video file has an audio stream.
+
+    Returns:
+        True if video has audio, False otherwise
+    """
+    cmd = [
+        "ffprobe",
+        "-v", "error",
+        "-select_streams", "a",  # Select only audio streams
+        "-show_entries", "stream=codec_type",
+        "-of", "csv=p=0",
+        video_path
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+        # If there's audio, ffprobe will output "audio"
+        return "audio" in result.stdout.lower()
+    except Exception:
+        return False
+
+
+async def get_audio_volume_info(video_path: str) -> Optional[dict]:
+    """
+    Get audio volume information to detect silent audio.
+
+    Returns:
+        dict with mean_volume, or None if no audio or error
+    """
+    # First check if audio stream exists
+    if not await has_audio_stream(video_path):
+        return None
+
+    cmd = [
+        "ffmpeg",
+        "-i", video_path,
+        "-af", "volumedetect",
+        "-f", "null",
+        "-"
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+        stderr = result.stderr
+
+        info = {}
+        for line in stderr.split('\n'):
+            if 'mean_volume' in line:
+                info['mean_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))
+            elif 'max_volume' in line:
+                info['max_volume'] = float(line.split(':')[1].strip().replace(' dB', ''))
+
+        return info if info else None
+
+    except Exception:
+        return None
+
+
+def is_audio_silent(volume_info: Optional[dict], threshold_db: float = -50.0) -> bool:
+    """
+    Check if audio is effectively silent (below threshold).
+
+    Args:
+        volume_info: dict from get_audio_volume_info
+        threshold_db: Volume below this is considered silent (default -50dB)
+
+    Returns:
+        True if silent or no audio, False otherwise
+    """
+    if not volume_info:
+        return True
+
+    mean_volume = volume_info.get('mean_volume', -100)
+    return mean_volume < threshold_db
				`@@ -0,0 +1 @@`
				`from app.routers import download, process, bgm, jobs`