Initial commit: YouTube Shorts maker application

Features: - Video download from TikTok/Douyin using yt-dlp - Audio transcription with OpenAI Whisper - GPT-4 translation (direct/summarize/rewrite modes) - Subtitle generation with ASS format - Video trimming with frame-accurate preview - BGM integration with volume control - Intro text overlay support - Thumbnail generation with text overlay Tech stack: - Backend: FastAPI, Python 3.11+ - Frontend: React, Vite, TailwindCSS - Video processing: FFmpeg - AI: OpenAI Whisper, GPT-4 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-03 21:38:34 +09:00
commit c3795138da
64 changed files with 13059 additions and 0 deletions
--- a/backend/app/services/bgm_recommender.py
+++ b/backend/app/services/bgm_recommender.py
@@ -0,0 +1,295 @@
+"""
+BGM Recommender Service
+
+Analyzes script content and recommends appropriate BGM based on mood/tone.
+Uses GPT to analyze the emotional tone and suggests matching music.
+"""
+
+import os
+from typing import List, Tuple, Optional
+from openai import OpenAI
+from pydantic import BaseModel
+from app.config import settings
+from app.models.schemas import TranscriptSegment
+
+
+class BGMRecommendation(BaseModel):
+    """BGM recommendation result."""
+    mood: str  # detected mood
+    energy: str  # low, medium, high
+    suggested_genres: List[str]
+    search_keywords: List[str]
+    reasoning: str
+    matched_bgm_id: Optional[str] = None  # if found in local library
+
+
+# Mood to BGM mapping
+MOOD_BGM_MAPPING = {
+    "upbeat": {
+        "genres": ["pop", "electronic", "dance"],
+        "keywords": ["upbeat", "energetic", "happy", "positive"],
+        "energy": "high",
+    },
+    "chill": {
+        "genres": ["lofi", "ambient", "acoustic"],
+        "keywords": ["chill", "relaxing", "calm", "peaceful"],
+        "energy": "low",
+    },
+    "dramatic": {
+        "genres": ["cinematic", "orchestral", "epic"],
+        "keywords": ["dramatic", "epic", "intense", "cinematic"],
+        "energy": "high",
+    },
+    "funny": {
+        "genres": ["comedy", "quirky", "playful"],
+        "keywords": ["funny", "quirky", "comedy", "playful"],
+        "energy": "medium",
+    },
+    "emotional": {
+        "genres": ["piano", "strings", "ballad"],
+        "keywords": ["emotional", "sad", "touching", "heartfelt"],
+        "energy": "low",
+    },
+    "informative": {
+        "genres": ["corporate", "background", "minimal"],
+        "keywords": ["corporate", "background", "tech", "modern"],
+        "energy": "medium",
+    },
+    "exciting": {
+        "genres": ["rock", "action", "sports"],
+        "keywords": ["exciting", "action", "sports", "adventure"],
+        "energy": "high",
+    },
+    "mysterious": {
+        "genres": ["ambient", "dark", "suspense"],
+        "keywords": ["mysterious", "suspense", "dark", "tension"],
+        "energy": "medium",
+    },
+}
+
+
+async def analyze_script_mood(
+    segments: List[TranscriptSegment],
+    use_translated: bool = True,
+) -> Tuple[bool, str, Optional[BGMRecommendation]]:
+    """
+    Analyze script content to determine mood and recommend BGM.
+
+    Args:
+        segments: Transcript segments (original or translated)
+        use_translated: Whether to use translated text
+
+    Returns:
+        Tuple of (success, message, recommendation)
+    """
+    if not settings.OPENAI_API_KEY:
+        return False, "OpenAI API key not configured", None
+
+    if not segments:
+        return False, "No transcript segments provided", None
+
+    # Combine script text
+    script_text = "\n".join([
+        seg.translated if use_translated and seg.translated else seg.text
+        for seg in segments
+    ])
+
+    try:
+        client = OpenAI(api_key=settings.OPENAI_API_KEY)
+
+        response = client.chat.completions.create(
+            model=settings.OPENAI_MODEL,
+            messages=[
+                {
+                    "role": "system",
+                    "content": """You are a music supervisor for YouTube Shorts.
+Analyze the script and determine the best background music mood.
+
+Respond in JSON format ONLY:
+{
+    "mood": "one of: upbeat, chill, dramatic, funny, emotional, informative, exciting, mysterious",
+    "energy": "low, medium, or high",
+    "reasoning": "brief explanation in Korean (1 sentence)"
+}
+
+Consider:
+- Overall emotional tone of the content
+- Pacing and energy level
+- Target audience engagement
+- What would make viewers watch till the end"""
+                },
+                {
+                    "role": "user",
+                    "content": f"Script:\n{script_text}"
+                }
+            ],
+            temperature=0.3,
+            max_tokens=200,
+        )
+
+        # Parse response
+        import json
+        result_text = response.choices[0].message.content.strip()
+
+        # Clean up JSON if wrapped in markdown
+        if result_text.startswith("```"):
+            result_text = result_text.split("```")[1]
+            if result_text.startswith("json"):
+                result_text = result_text[4:]
+
+        result = json.loads(result_text)
+
+        mood = result.get("mood", "upbeat")
+        energy = result.get("energy", "medium")
+        reasoning = result.get("reasoning", "")
+
+        # Get BGM suggestions based on mood
+        mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
+
+        recommendation = BGMRecommendation(
+            mood=mood,
+            energy=energy,
+            suggested_genres=mood_info["genres"],
+            search_keywords=mood_info["keywords"],
+            reasoning=reasoning,
+        )
+
+        return True, f"Mood analysis complete: {mood}", recommendation
+
+    except json.JSONDecodeError as e:
+        return False, f"Failed to parse mood analysis: {str(e)}", None
+    except Exception as e:
+        return False, f"Mood analysis error: {str(e)}", None
+
+
+async def find_matching_bgm(
+    recommendation: BGMRecommendation,
+    available_bgm: List[dict],
+) -> Optional[str]:
+    """
+    Find a matching BGM from available library based on recommendation.
+
+    Args:
+        recommendation: BGM recommendation from mood analysis
+        available_bgm: List of available BGM info dicts with 'id' and 'name'
+
+    Returns:
+        BGM ID if found, None otherwise
+    """
+    if not available_bgm:
+        return None
+
+    keywords = recommendation.search_keywords + [recommendation.mood]
+
+    # Score each BGM based on keyword matching
+    best_match = None
+    best_score = 0
+
+    for bgm in available_bgm:
+        bgm_name = bgm.get("name", "").lower()
+        bgm_id = bgm.get("id", "").lower()
+
+        score = 0
+        for keyword in keywords:
+            if keyword.lower() in bgm_name or keyword.lower() in bgm_id:
+                score += 1
+
+        if score > best_score:
+            best_score = score
+            best_match = bgm.get("id")
+
+    return best_match if best_score > 0 else None
+
+
+async def recommend_bgm_for_script(
+    segments: List[TranscriptSegment],
+    available_bgm: List[dict],
+    use_translated: bool = True,
+) -> Tuple[bool, str, Optional[BGMRecommendation]]:
+    """
+    Complete BGM recommendation workflow:
+    1. Analyze script mood
+    2. Find matching BGM from library
+    3. Return recommendation with search keywords for external sources
+
+    Args:
+        segments: Transcript segments
+        available_bgm: List of available BGM in library
+        use_translated: Whether to use translated text
+
+    Returns:
+        Tuple of (success, message, recommendation with matched_bgm_id if found)
+    """
+    # Step 1: Analyze mood
+    success, message, recommendation = await analyze_script_mood(
+        segments, use_translated
+    )
+
+    if not success or not recommendation:
+        return success, message, recommendation
+
+    # Step 2: Find matching BGM in library
+    matched_id = await find_matching_bgm(recommendation, available_bgm)
+
+    if matched_id:
+        recommendation.matched_bgm_id = matched_id
+        message = f"Mood: {recommendation.mood} | Matched BGM: {matched_id}"
+    else:
+        message = f"Mood: {recommendation.mood} | No local BGM matched, search with: {', '.join(recommendation.search_keywords[:3])}"
+
+    return True, message, recommendation
+
+
+# Predefined BGM presets for common content types
+BGM_PRESETS = {
+    "cooking": {
+        "mood": "chill",
+        "keywords": ["cooking", "food", "kitchen", "cozy"],
+    },
+    "fitness": {
+        "mood": "upbeat",
+        "keywords": ["workout", "fitness", "energetic", "motivation"],
+    },
+    "tutorial": {
+        "mood": "informative",
+        "keywords": ["tutorial", "tech", "corporate", "background"],
+    },
+    "comedy": {
+        "mood": "funny",
+        "keywords": ["funny", "comedy", "quirky", "playful"],
+    },
+    "travel": {
+        "mood": "exciting",
+        "keywords": ["travel", "adventure", "upbeat", "inspiring"],
+    },
+    "asmr": {
+        "mood": "chill",
+        "keywords": ["asmr", "relaxing", "ambient", "soft"],
+    },
+    "news": {
+        "mood": "informative",
+        "keywords": ["news", "corporate", "serious", "background"],
+    },
+    "gaming": {
+        "mood": "exciting",
+        "keywords": ["gaming", "electronic", "action", "intense"],
+    },
+}
+
+
+def get_preset_recommendation(content_type: str) -> Optional[BGMRecommendation]:
+    """Get BGM recommendation for common content types."""
+    preset = BGM_PRESETS.get(content_type.lower())
+    if not preset:
+        return None
+
+    mood = preset["mood"]
+    mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
+
+    return BGMRecommendation(
+        mood=mood,
+        energy=mood_info["energy"],
+        suggested_genres=mood_info["genres"],
+        search_keywords=preset["keywords"],
+        reasoning=f"Preset for {content_type} content",
+    )