Initial commit: YouTube Shorts maker application

Features:
- Video download from TikTok/Douyin using yt-dlp
- Audio transcription with OpenAI Whisper
- GPT-4 translation (direct/summarize/rewrite modes)
- Subtitle generation with ASS format
- Video trimming with frame-accurate preview
- BGM integration with volume control
- Intro text overlay support
- Thumbnail generation with text overlay

Tech stack:
- Backend: FastAPI, Python 3.11+
- Frontend: React, Vite, TailwindCSS
- Video processing: FFmpeg
- AI: OpenAI Whisper, GPT-4

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
kihong.kim
2026-01-03 21:38:34 +09:00
commit c3795138da
64 changed files with 13059 additions and 0 deletions

View File

@@ -0,0 +1,295 @@
"""
BGM Recommender Service
Analyzes script content and recommends appropriate BGM based on mood/tone.
Uses GPT to analyze the emotional tone and suggests matching music.
"""
import os
from typing import List, Tuple, Optional
from openai import OpenAI
from pydantic import BaseModel
from app.config import settings
from app.models.schemas import TranscriptSegment
class BGMRecommendation(BaseModel):
"""BGM recommendation result."""
mood: str # detected mood
energy: str # low, medium, high
suggested_genres: List[str]
search_keywords: List[str]
reasoning: str
matched_bgm_id: Optional[str] = None # if found in local library
# Mood to BGM mapping
MOOD_BGM_MAPPING = {
"upbeat": {
"genres": ["pop", "electronic", "dance"],
"keywords": ["upbeat", "energetic", "happy", "positive"],
"energy": "high",
},
"chill": {
"genres": ["lofi", "ambient", "acoustic"],
"keywords": ["chill", "relaxing", "calm", "peaceful"],
"energy": "low",
},
"dramatic": {
"genres": ["cinematic", "orchestral", "epic"],
"keywords": ["dramatic", "epic", "intense", "cinematic"],
"energy": "high",
},
"funny": {
"genres": ["comedy", "quirky", "playful"],
"keywords": ["funny", "quirky", "comedy", "playful"],
"energy": "medium",
},
"emotional": {
"genres": ["piano", "strings", "ballad"],
"keywords": ["emotional", "sad", "touching", "heartfelt"],
"energy": "low",
},
"informative": {
"genres": ["corporate", "background", "minimal"],
"keywords": ["corporate", "background", "tech", "modern"],
"energy": "medium",
},
"exciting": {
"genres": ["rock", "action", "sports"],
"keywords": ["exciting", "action", "sports", "adventure"],
"energy": "high",
},
"mysterious": {
"genres": ["ambient", "dark", "suspense"],
"keywords": ["mysterious", "suspense", "dark", "tension"],
"energy": "medium",
},
}
async def analyze_script_mood(
segments: List[TranscriptSegment],
use_translated: bool = True,
) -> Tuple[bool, str, Optional[BGMRecommendation]]:
"""
Analyze script content to determine mood and recommend BGM.
Args:
segments: Transcript segments (original or translated)
use_translated: Whether to use translated text
Returns:
Tuple of (success, message, recommendation)
"""
if not settings.OPENAI_API_KEY:
return False, "OpenAI API key not configured", None
if not segments:
return False, "No transcript segments provided", None
# Combine script text
script_text = "\n".join([
seg.translated if use_translated and seg.translated else seg.text
for seg in segments
])
try:
client = OpenAI(api_key=settings.OPENAI_API_KEY)
response = client.chat.completions.create(
model=settings.OPENAI_MODEL,
messages=[
{
"role": "system",
"content": """You are a music supervisor for YouTube Shorts.
Analyze the script and determine the best background music mood.
Respond in JSON format ONLY:
{
"mood": "one of: upbeat, chill, dramatic, funny, emotional, informative, exciting, mysterious",
"energy": "low, medium, or high",
"reasoning": "brief explanation in Korean (1 sentence)"
}
Consider:
- Overall emotional tone of the content
- Pacing and energy level
- Target audience engagement
- What would make viewers watch till the end"""
},
{
"role": "user",
"content": f"Script:\n{script_text}"
}
],
temperature=0.3,
max_tokens=200,
)
# Parse response
import json
result_text = response.choices[0].message.content.strip()
# Clean up JSON if wrapped in markdown
if result_text.startswith("```"):
result_text = result_text.split("```")[1]
if result_text.startswith("json"):
result_text = result_text[4:]
result = json.loads(result_text)
mood = result.get("mood", "upbeat")
energy = result.get("energy", "medium")
reasoning = result.get("reasoning", "")
# Get BGM suggestions based on mood
mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
recommendation = BGMRecommendation(
mood=mood,
energy=energy,
suggested_genres=mood_info["genres"],
search_keywords=mood_info["keywords"],
reasoning=reasoning,
)
return True, f"Mood analysis complete: {mood}", recommendation
except json.JSONDecodeError as e:
return False, f"Failed to parse mood analysis: {str(e)}", None
except Exception as e:
return False, f"Mood analysis error: {str(e)}", None
async def find_matching_bgm(
recommendation: BGMRecommendation,
available_bgm: List[dict],
) -> Optional[str]:
"""
Find a matching BGM from available library based on recommendation.
Args:
recommendation: BGM recommendation from mood analysis
available_bgm: List of available BGM info dicts with 'id' and 'name'
Returns:
BGM ID if found, None otherwise
"""
if not available_bgm:
return None
keywords = recommendation.search_keywords + [recommendation.mood]
# Score each BGM based on keyword matching
best_match = None
best_score = 0
for bgm in available_bgm:
bgm_name = bgm.get("name", "").lower()
bgm_id = bgm.get("id", "").lower()
score = 0
for keyword in keywords:
if keyword.lower() in bgm_name or keyword.lower() in bgm_id:
score += 1
if score > best_score:
best_score = score
best_match = bgm.get("id")
return best_match if best_score > 0 else None
async def recommend_bgm_for_script(
segments: List[TranscriptSegment],
available_bgm: List[dict],
use_translated: bool = True,
) -> Tuple[bool, str, Optional[BGMRecommendation]]:
"""
Complete BGM recommendation workflow:
1. Analyze script mood
2. Find matching BGM from library
3. Return recommendation with search keywords for external sources
Args:
segments: Transcript segments
available_bgm: List of available BGM in library
use_translated: Whether to use translated text
Returns:
Tuple of (success, message, recommendation with matched_bgm_id if found)
"""
# Step 1: Analyze mood
success, message, recommendation = await analyze_script_mood(
segments, use_translated
)
if not success or not recommendation:
return success, message, recommendation
# Step 2: Find matching BGM in library
matched_id = await find_matching_bgm(recommendation, available_bgm)
if matched_id:
recommendation.matched_bgm_id = matched_id
message = f"Mood: {recommendation.mood} | Matched BGM: {matched_id}"
else:
message = f"Mood: {recommendation.mood} | No local BGM matched, search with: {', '.join(recommendation.search_keywords[:3])}"
return True, message, recommendation
# Predefined BGM presets for common content types
BGM_PRESETS = {
"cooking": {
"mood": "chill",
"keywords": ["cooking", "food", "kitchen", "cozy"],
},
"fitness": {
"mood": "upbeat",
"keywords": ["workout", "fitness", "energetic", "motivation"],
},
"tutorial": {
"mood": "informative",
"keywords": ["tutorial", "tech", "corporate", "background"],
},
"comedy": {
"mood": "funny",
"keywords": ["funny", "comedy", "quirky", "playful"],
},
"travel": {
"mood": "exciting",
"keywords": ["travel", "adventure", "upbeat", "inspiring"],
},
"asmr": {
"mood": "chill",
"keywords": ["asmr", "relaxing", "ambient", "soft"],
},
"news": {
"mood": "informative",
"keywords": ["news", "corporate", "serious", "background"],
},
"gaming": {
"mood": "exciting",
"keywords": ["gaming", "electronic", "action", "intense"],
},
}
def get_preset_recommendation(content_type: str) -> Optional[BGMRecommendation]:
"""Get BGM recommendation for common content types."""
preset = BGM_PRESETS.get(content_type.lower())
if not preset:
return None
mood = preset["mood"]
mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
return BGMRecommendation(
mood=mood,
energy=mood_info["energy"],
suggested_genres=mood_info["genres"],
search_keywords=preset["keywords"],
reasoning=f"Preset for {content_type} content",
)