Initial commit: YouTube Shorts maker application
Features: - Video download from TikTok/Douyin using yt-dlp - Audio transcription with OpenAI Whisper - GPT-4 translation (direct/summarize/rewrite modes) - Subtitle generation with ASS format - Video trimming with frame-accurate preview - BGM integration with volume control - Intro text overlay support - Thumbnail generation with text overlay Tech stack: - Backend: FastAPI, Python 3.11+ - Frontend: React, Vite, TailwindCSS - Video processing: FFmpeg - AI: OpenAI Whisper, GPT-4 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
295
backend/app/services/bgm_recommender.py
Normal file
295
backend/app/services/bgm_recommender.py
Normal file
@@ -0,0 +1,295 @@
|
||||
"""
|
||||
BGM Recommender Service
|
||||
|
||||
Analyzes script content and recommends appropriate BGM based on mood/tone.
|
||||
Uses GPT to analyze the emotional tone and suggests matching music.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Tuple, Optional
|
||||
from openai import OpenAI
|
||||
from pydantic import BaseModel
|
||||
from app.config import settings
|
||||
from app.models.schemas import TranscriptSegment
|
||||
|
||||
|
||||
class BGMRecommendation(BaseModel):
|
||||
"""BGM recommendation result."""
|
||||
mood: str # detected mood
|
||||
energy: str # low, medium, high
|
||||
suggested_genres: List[str]
|
||||
search_keywords: List[str]
|
||||
reasoning: str
|
||||
matched_bgm_id: Optional[str] = None # if found in local library
|
||||
|
||||
|
||||
# Mood to BGM mapping
|
||||
MOOD_BGM_MAPPING = {
|
||||
"upbeat": {
|
||||
"genres": ["pop", "electronic", "dance"],
|
||||
"keywords": ["upbeat", "energetic", "happy", "positive"],
|
||||
"energy": "high",
|
||||
},
|
||||
"chill": {
|
||||
"genres": ["lofi", "ambient", "acoustic"],
|
||||
"keywords": ["chill", "relaxing", "calm", "peaceful"],
|
||||
"energy": "low",
|
||||
},
|
||||
"dramatic": {
|
||||
"genres": ["cinematic", "orchestral", "epic"],
|
||||
"keywords": ["dramatic", "epic", "intense", "cinematic"],
|
||||
"energy": "high",
|
||||
},
|
||||
"funny": {
|
||||
"genres": ["comedy", "quirky", "playful"],
|
||||
"keywords": ["funny", "quirky", "comedy", "playful"],
|
||||
"energy": "medium",
|
||||
},
|
||||
"emotional": {
|
||||
"genres": ["piano", "strings", "ballad"],
|
||||
"keywords": ["emotional", "sad", "touching", "heartfelt"],
|
||||
"energy": "low",
|
||||
},
|
||||
"informative": {
|
||||
"genres": ["corporate", "background", "minimal"],
|
||||
"keywords": ["corporate", "background", "tech", "modern"],
|
||||
"energy": "medium",
|
||||
},
|
||||
"exciting": {
|
||||
"genres": ["rock", "action", "sports"],
|
||||
"keywords": ["exciting", "action", "sports", "adventure"],
|
||||
"energy": "high",
|
||||
},
|
||||
"mysterious": {
|
||||
"genres": ["ambient", "dark", "suspense"],
|
||||
"keywords": ["mysterious", "suspense", "dark", "tension"],
|
||||
"energy": "medium",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
async def analyze_script_mood(
|
||||
segments: List[TranscriptSegment],
|
||||
use_translated: bool = True,
|
||||
) -> Tuple[bool, str, Optional[BGMRecommendation]]:
|
||||
"""
|
||||
Analyze script content to determine mood and recommend BGM.
|
||||
|
||||
Args:
|
||||
segments: Transcript segments (original or translated)
|
||||
use_translated: Whether to use translated text
|
||||
|
||||
Returns:
|
||||
Tuple of (success, message, recommendation)
|
||||
"""
|
||||
if not settings.OPENAI_API_KEY:
|
||||
return False, "OpenAI API key not configured", None
|
||||
|
||||
if not segments:
|
||||
return False, "No transcript segments provided", None
|
||||
|
||||
# Combine script text
|
||||
script_text = "\n".join([
|
||||
seg.translated if use_translated and seg.translated else seg.text
|
||||
for seg in segments
|
||||
])
|
||||
|
||||
try:
|
||||
client = OpenAI(api_key=settings.OPENAI_API_KEY)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model=settings.OPENAI_MODEL,
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": """You are a music supervisor for YouTube Shorts.
|
||||
Analyze the script and determine the best background music mood.
|
||||
|
||||
Respond in JSON format ONLY:
|
||||
{
|
||||
"mood": "one of: upbeat, chill, dramatic, funny, emotional, informative, exciting, mysterious",
|
||||
"energy": "low, medium, or high",
|
||||
"reasoning": "brief explanation in Korean (1 sentence)"
|
||||
}
|
||||
|
||||
Consider:
|
||||
- Overall emotional tone of the content
|
||||
- Pacing and energy level
|
||||
- Target audience engagement
|
||||
- What would make viewers watch till the end"""
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Script:\n{script_text}"
|
||||
}
|
||||
],
|
||||
temperature=0.3,
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
# Parse response
|
||||
import json
|
||||
result_text = response.choices[0].message.content.strip()
|
||||
|
||||
# Clean up JSON if wrapped in markdown
|
||||
if result_text.startswith("```"):
|
||||
result_text = result_text.split("```")[1]
|
||||
if result_text.startswith("json"):
|
||||
result_text = result_text[4:]
|
||||
|
||||
result = json.loads(result_text)
|
||||
|
||||
mood = result.get("mood", "upbeat")
|
||||
energy = result.get("energy", "medium")
|
||||
reasoning = result.get("reasoning", "")
|
||||
|
||||
# Get BGM suggestions based on mood
|
||||
mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
|
||||
|
||||
recommendation = BGMRecommendation(
|
||||
mood=mood,
|
||||
energy=energy,
|
||||
suggested_genres=mood_info["genres"],
|
||||
search_keywords=mood_info["keywords"],
|
||||
reasoning=reasoning,
|
||||
)
|
||||
|
||||
return True, f"Mood analysis complete: {mood}", recommendation
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
return False, f"Failed to parse mood analysis: {str(e)}", None
|
||||
except Exception as e:
|
||||
return False, f"Mood analysis error: {str(e)}", None
|
||||
|
||||
|
||||
async def find_matching_bgm(
|
||||
recommendation: BGMRecommendation,
|
||||
available_bgm: List[dict],
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Find a matching BGM from available library based on recommendation.
|
||||
|
||||
Args:
|
||||
recommendation: BGM recommendation from mood analysis
|
||||
available_bgm: List of available BGM info dicts with 'id' and 'name'
|
||||
|
||||
Returns:
|
||||
BGM ID if found, None otherwise
|
||||
"""
|
||||
if not available_bgm:
|
||||
return None
|
||||
|
||||
keywords = recommendation.search_keywords + [recommendation.mood]
|
||||
|
||||
# Score each BGM based on keyword matching
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
for bgm in available_bgm:
|
||||
bgm_name = bgm.get("name", "").lower()
|
||||
bgm_id = bgm.get("id", "").lower()
|
||||
|
||||
score = 0
|
||||
for keyword in keywords:
|
||||
if keyword.lower() in bgm_name or keyword.lower() in bgm_id:
|
||||
score += 1
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = bgm.get("id")
|
||||
|
||||
return best_match if best_score > 0 else None
|
||||
|
||||
|
||||
async def recommend_bgm_for_script(
|
||||
segments: List[TranscriptSegment],
|
||||
available_bgm: List[dict],
|
||||
use_translated: bool = True,
|
||||
) -> Tuple[bool, str, Optional[BGMRecommendation]]:
|
||||
"""
|
||||
Complete BGM recommendation workflow:
|
||||
1. Analyze script mood
|
||||
2. Find matching BGM from library
|
||||
3. Return recommendation with search keywords for external sources
|
||||
|
||||
Args:
|
||||
segments: Transcript segments
|
||||
available_bgm: List of available BGM in library
|
||||
use_translated: Whether to use translated text
|
||||
|
||||
Returns:
|
||||
Tuple of (success, message, recommendation with matched_bgm_id if found)
|
||||
"""
|
||||
# Step 1: Analyze mood
|
||||
success, message, recommendation = await analyze_script_mood(
|
||||
segments, use_translated
|
||||
)
|
||||
|
||||
if not success or not recommendation:
|
||||
return success, message, recommendation
|
||||
|
||||
# Step 2: Find matching BGM in library
|
||||
matched_id = await find_matching_bgm(recommendation, available_bgm)
|
||||
|
||||
if matched_id:
|
||||
recommendation.matched_bgm_id = matched_id
|
||||
message = f"Mood: {recommendation.mood} | Matched BGM: {matched_id}"
|
||||
else:
|
||||
message = f"Mood: {recommendation.mood} | No local BGM matched, search with: {', '.join(recommendation.search_keywords[:3])}"
|
||||
|
||||
return True, message, recommendation
|
||||
|
||||
|
||||
# Predefined BGM presets for common content types
|
||||
BGM_PRESETS = {
|
||||
"cooking": {
|
||||
"mood": "chill",
|
||||
"keywords": ["cooking", "food", "kitchen", "cozy"],
|
||||
},
|
||||
"fitness": {
|
||||
"mood": "upbeat",
|
||||
"keywords": ["workout", "fitness", "energetic", "motivation"],
|
||||
},
|
||||
"tutorial": {
|
||||
"mood": "informative",
|
||||
"keywords": ["tutorial", "tech", "corporate", "background"],
|
||||
},
|
||||
"comedy": {
|
||||
"mood": "funny",
|
||||
"keywords": ["funny", "comedy", "quirky", "playful"],
|
||||
},
|
||||
"travel": {
|
||||
"mood": "exciting",
|
||||
"keywords": ["travel", "adventure", "upbeat", "inspiring"],
|
||||
},
|
||||
"asmr": {
|
||||
"mood": "chill",
|
||||
"keywords": ["asmr", "relaxing", "ambient", "soft"],
|
||||
},
|
||||
"news": {
|
||||
"mood": "informative",
|
||||
"keywords": ["news", "corporate", "serious", "background"],
|
||||
},
|
||||
"gaming": {
|
||||
"mood": "exciting",
|
||||
"keywords": ["gaming", "electronic", "action", "intense"],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def get_preset_recommendation(content_type: str) -> Optional[BGMRecommendation]:
|
||||
"""Get BGM recommendation for common content types."""
|
||||
preset = BGM_PRESETS.get(content_type.lower())
|
||||
if not preset:
|
||||
return None
|
||||
|
||||
mood = preset["mood"]
|
||||
mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"])
|
||||
|
||||
return BGMRecommendation(
|
||||
mood=mood,
|
||||
energy=mood_info["energy"],
|
||||
suggested_genres=mood_info["genres"],
|
||||
search_keywords=preset["keywords"],
|
||||
reasoning=f"Preset for {content_type} content",
|
||||
)
|
||||
Reference in New Issue
Block a user