""" BGM Recommender Service Analyzes script content and recommends appropriate BGM based on mood/tone. Uses GPT to analyze the emotional tone and suggests matching music. """ import os from typing import List, Tuple, Optional from openai import OpenAI from pydantic import BaseModel from app.config import settings from app.models.schemas import TranscriptSegment class BGMRecommendation(BaseModel): """BGM recommendation result.""" mood: str # detected mood energy: str # low, medium, high suggested_genres: List[str] search_keywords: List[str] reasoning: str matched_bgm_id: Optional[str] = None # if found in local library # Mood to BGM mapping MOOD_BGM_MAPPING = { "upbeat": { "genres": ["pop", "electronic", "dance"], "keywords": ["upbeat", "energetic", "happy", "positive"], "energy": "high", }, "chill": { "genres": ["lofi", "ambient", "acoustic"], "keywords": ["chill", "relaxing", "calm", "peaceful"], "energy": "low", }, "dramatic": { "genres": ["cinematic", "orchestral", "epic"], "keywords": ["dramatic", "epic", "intense", "cinematic"], "energy": "high", }, "funny": { "genres": ["comedy", "quirky", "playful"], "keywords": ["funny", "quirky", "comedy", "playful"], "energy": "medium", }, "emotional": { "genres": ["piano", "strings", "ballad"], "keywords": ["emotional", "sad", "touching", "heartfelt"], "energy": "low", }, "informative": { "genres": ["corporate", "background", "minimal"], "keywords": ["corporate", "background", "tech", "modern"], "energy": "medium", }, "exciting": { "genres": ["rock", "action", "sports"], "keywords": ["exciting", "action", "sports", "adventure"], "energy": "high", }, "mysterious": { "genres": ["ambient", "dark", "suspense"], "keywords": ["mysterious", "suspense", "dark", "tension"], "energy": "medium", }, } async def analyze_script_mood( segments: List[TranscriptSegment], use_translated: bool = True, ) -> Tuple[bool, str, Optional[BGMRecommendation]]: """ Analyze script content to determine mood and recommend BGM. Args: segments: Transcript segments (original or translated) use_translated: Whether to use translated text Returns: Tuple of (success, message, recommendation) """ if not settings.OPENAI_API_KEY: return False, "OpenAI API key not configured", None if not segments: return False, "No transcript segments provided", None # Combine script text script_text = "\n".join([ seg.translated if use_translated and seg.translated else seg.text for seg in segments ]) try: client = OpenAI(api_key=settings.OPENAI_API_KEY) response = client.chat.completions.create( model=settings.OPENAI_MODEL, messages=[ { "role": "system", "content": """You are a music supervisor for YouTube Shorts. Analyze the script and determine the best background music mood. Respond in JSON format ONLY: { "mood": "one of: upbeat, chill, dramatic, funny, emotional, informative, exciting, mysterious", "energy": "low, medium, or high", "reasoning": "brief explanation in Korean (1 sentence)" } Consider: - Overall emotional tone of the content - Pacing and energy level - Target audience engagement - What would make viewers watch till the end""" }, { "role": "user", "content": f"Script:\n{script_text}" } ], temperature=0.3, max_tokens=200, ) # Parse response import json result_text = response.choices[0].message.content.strip() # Clean up JSON if wrapped in markdown if result_text.startswith("```"): result_text = result_text.split("```")[1] if result_text.startswith("json"): result_text = result_text[4:] result = json.loads(result_text) mood = result.get("mood", "upbeat") energy = result.get("energy", "medium") reasoning = result.get("reasoning", "") # Get BGM suggestions based on mood mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"]) recommendation = BGMRecommendation( mood=mood, energy=energy, suggested_genres=mood_info["genres"], search_keywords=mood_info["keywords"], reasoning=reasoning, ) return True, f"Mood analysis complete: {mood}", recommendation except json.JSONDecodeError as e: return False, f"Failed to parse mood analysis: {str(e)}", None except Exception as e: return False, f"Mood analysis error: {str(e)}", None async def find_matching_bgm( recommendation: BGMRecommendation, available_bgm: List[dict], ) -> Optional[str]: """ Find a matching BGM from available library based on recommendation. Args: recommendation: BGM recommendation from mood analysis available_bgm: List of available BGM info dicts with 'id' and 'name' Returns: BGM ID if found, None otherwise """ if not available_bgm: return None keywords = recommendation.search_keywords + [recommendation.mood] # Score each BGM based on keyword matching best_match = None best_score = 0 for bgm in available_bgm: bgm_name = bgm.get("name", "").lower() bgm_id = bgm.get("id", "").lower() score = 0 for keyword in keywords: if keyword.lower() in bgm_name or keyword.lower() in bgm_id: score += 1 if score > best_score: best_score = score best_match = bgm.get("id") return best_match if best_score > 0 else None async def recommend_bgm_for_script( segments: List[TranscriptSegment], available_bgm: List[dict], use_translated: bool = True, ) -> Tuple[bool, str, Optional[BGMRecommendation]]: """ Complete BGM recommendation workflow: 1. Analyze script mood 2. Find matching BGM from library 3. Return recommendation with search keywords for external sources Args: segments: Transcript segments available_bgm: List of available BGM in library use_translated: Whether to use translated text Returns: Tuple of (success, message, recommendation with matched_bgm_id if found) """ # Step 1: Analyze mood success, message, recommendation = await analyze_script_mood( segments, use_translated ) if not success or not recommendation: return success, message, recommendation # Step 2: Find matching BGM in library matched_id = await find_matching_bgm(recommendation, available_bgm) if matched_id: recommendation.matched_bgm_id = matched_id message = f"Mood: {recommendation.mood} | Matched BGM: {matched_id}" else: message = f"Mood: {recommendation.mood} | No local BGM matched, search with: {', '.join(recommendation.search_keywords[:3])}" return True, message, recommendation # Predefined BGM presets for common content types BGM_PRESETS = { "cooking": { "mood": "chill", "keywords": ["cooking", "food", "kitchen", "cozy"], }, "fitness": { "mood": "upbeat", "keywords": ["workout", "fitness", "energetic", "motivation"], }, "tutorial": { "mood": "informative", "keywords": ["tutorial", "tech", "corporate", "background"], }, "comedy": { "mood": "funny", "keywords": ["funny", "comedy", "quirky", "playful"], }, "travel": { "mood": "exciting", "keywords": ["travel", "adventure", "upbeat", "inspiring"], }, "asmr": { "mood": "chill", "keywords": ["asmr", "relaxing", "ambient", "soft"], }, "news": { "mood": "informative", "keywords": ["news", "corporate", "serious", "background"], }, "gaming": { "mood": "exciting", "keywords": ["gaming", "electronic", "action", "intense"], }, } def get_preset_recommendation(content_type: str) -> Optional[BGMRecommendation]: """Get BGM recommendation for common content types.""" preset = BGM_PRESETS.get(content_type.lower()) if not preset: return None mood = preset["mood"] mood_info = MOOD_BGM_MAPPING.get(mood, MOOD_BGM_MAPPING["upbeat"]) return BGMRecommendation( mood=mood, energy=mood_info["energy"], suggested_genres=mood_info["genres"], search_keywords=preset["keywords"], reasoning=f"Preset for {content_type} content", )