Files
bini-shorts-maker/backend/app/services/bgm_provider.py
kihong.kim be3ed688a1 feat(bgm): 카테고리당 3개 다운로드 및 카테고리별 그룹화
- 카테고리당 BGM 3개씩 다운로드 기능 추가
- 파일명에 카테고리 prefix 추가 (예: upbeat_trackname.mp3)
- 중복 BGM 자동 스킵 기능
- 프론트엔드에서 BGM을 카테고리별로 그룹화하여 표시
- 분류되지 않은 BGM은 '기타' 섹션에 표시

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-04 21:36:30 +09:00

549 lines
18 KiB
Python

"""
BGM Provider Service - Freesound & Pixabay Integration
Freesound API: https://freesound.org/docs/api/
- 500,000+ Creative Commons licensed sounds
- Free API with generous rate limits
- Various licenses (CC0, CC-BY, CC-BY-NC, etc.)
Pixabay: Manual download recommended (no public Music API)
"""
import os
import httpx
import aiofiles
from typing import Optional, List, Tuple
from pydantic import BaseModel
from app.config import settings
class FreesoundTrack(BaseModel):
"""Freesound track model."""
id: int
name: str
duration: float # seconds
tags: List[str]
license: str
username: str
preview_url: str # HQ preview (128kbps mp3)
download_url: str # Original file (requires auth)
description: str = ""
class BGMSearchResult(BaseModel):
"""BGM search result."""
id: str
title: str
duration: int
tags: List[str]
preview_url: str
download_url: str = ""
license: str = ""
source: str = "freesound"
# Freesound license filters for commercial use
# CC0 and CC-BY are commercially usable, CC-BY-NC is NOT
COMMERCIAL_LICENSES = [
"Creative Commons 0", # CC0 - Public Domain
"Attribution", # CC-BY - Attribution required
"Attribution Noncommercial", # Exclude this (NOT commercial)
]
# License filter string for commercial-only search
COMMERCIAL_LICENSE_FILTER = 'license:"Creative Commons 0" OR license:"Attribution"'
async def search_freesound(
query: str,
min_duration: int = 10,
max_duration: int = 180, # Shorts typically < 60s, allow some buffer
page: int = 1,
page_size: int = 15,
filter_music: bool = True,
commercial_only: bool = True, # Default: only commercially usable
) -> Tuple[bool, str, List[BGMSearchResult]]:
"""
Search for sounds on Freesound API.
Args:
query: Search keywords (e.g., "upbeat music", "chill background")
min_duration: Minimum duration in seconds
max_duration: Maximum duration in seconds
page: Page number (1-indexed)
page_size: Results per page (max 150)
filter_music: Add "music" to query for better BGM results
commercial_only: Only return commercially usable licenses (CC0, CC-BY)
Returns:
Tuple of (success, message, results)
"""
api_key = settings.FREESOUND_API_KEY
if not api_key:
return False, "Freesound API key not configured. Get one at https://freesound.org/apiv2/apply", []
# Add "music" filter for better BGM results
search_query = f"{query} music" if filter_music and "music" not in query.lower() else query
# Build filter string for duration and license
filter_parts = [f"duration:[{min_duration} TO {max_duration}]"]
if commercial_only:
# Filter for commercially usable licenses only
# CC0 (Creative Commons 0) and CC-BY (Attribution) are commercial-OK
# Exclude CC-BY-NC (Noncommercial)
filter_parts.append('license:"Creative Commons 0"')
filter_str = " ".join(filter_parts)
params = {
"token": api_key,
"query": search_query,
"filter": filter_str,
"page": page,
"page_size": min(page_size, 150),
"fields": "id,name,duration,tags,license,username,previews,description",
"sort": "score", # relevance
}
try:
async with httpx.AsyncClient() as client:
response = await client.get(
"https://freesound.org/apiv2/search/text/",
params=params,
timeout=30,
)
if response.status_code == 401:
return False, "Invalid Freesound API key", []
if response.status_code != 200:
return False, f"Freesound API error: HTTP {response.status_code}", []
data = response.json()
results = []
for sound in data.get("results", []):
# Get preview URLs (prefer high quality)
previews = sound.get("previews", {})
preview_url = (
previews.get("preview-hq-mp3") or
previews.get("preview-lq-mp3") or
""
)
# Parse license for display
license_url = sound.get("license", "")
license_name = _parse_freesound_license(license_url)
results.append(BGMSearchResult(
id=str(sound["id"]),
title=sound.get("name", "Unknown"),
duration=int(sound.get("duration", 0)),
tags=sound.get("tags", [])[:10], # Limit tags
preview_url=preview_url,
download_url=f"https://freesound.org/apiv2/sounds/{sound['id']}/download/",
license=license_name,
source="freesound",
))
total = data.get("count", 0)
license_info = " (commercial use OK)" if commercial_only else ""
message = f"Found {total} sounds on Freesound{license_info}"
return True, message, results
except httpx.TimeoutException:
return False, "Freesound API timeout", []
except Exception as e:
return False, f"Freesound search error: {str(e)}", []
def _parse_freesound_license(license_url: str) -> str:
"""Parse Freesound license URL to human-readable name."""
if "zero" in license_url or "cc0" in license_url.lower():
return "CC0 (Public Domain)"
elif "by-nc" in license_url:
return "CC BY-NC (Non-Commercial)"
elif "by-sa" in license_url:
return "CC BY-SA (Share Alike)"
elif "by/" in license_url:
return "CC BY (Attribution)"
elif "sampling+" in license_url:
return "Sampling+"
else:
return "See License"
async def download_freesound(
sound_id: str,
output_dir: str,
filename: str,
) -> Tuple[bool, str, Optional[str]]:
"""
Download a sound from Freesound.
Note: Freesound requires OAuth for original file downloads.
This function downloads the HQ preview (128kbps MP3) which is sufficient for BGM.
Args:
sound_id: Freesound sound ID
output_dir: Directory to save file
filename: Output filename (without extension)
Returns:
Tuple of (success, message, file_path)
"""
api_key = settings.FREESOUND_API_KEY
if not api_key:
return False, "Freesound API key not configured", None
try:
async with httpx.AsyncClient() as client:
# First, get sound info to get preview URL
info_response = await client.get(
f"https://freesound.org/apiv2/sounds/{sound_id}/",
params={
"token": api_key,
"fields": "id,name,previews,license,username",
},
timeout=30,
)
if info_response.status_code != 200:
return False, f"Failed to get sound info: HTTP {info_response.status_code}", None
sound_data = info_response.json()
previews = sound_data.get("previews", {})
# Get high quality preview URL
preview_url = previews.get("preview-hq-mp3")
if not preview_url:
preview_url = previews.get("preview-lq-mp3")
if not preview_url:
return False, "No preview URL available", None
# Download the preview
audio_response = await client.get(preview_url, timeout=60, follow_redirects=True)
if audio_response.status_code != 200:
return False, f"Download failed: HTTP {audio_response.status_code}", None
# Save file
os.makedirs(output_dir, exist_ok=True)
file_path = os.path.join(output_dir, f"{filename}.mp3")
async with aiofiles.open(file_path, 'wb') as f:
await f.write(audio_response.content)
# Get attribution info
username = sound_data.get("username", "Unknown")
license_name = _parse_freesound_license(sound_data.get("license", ""))
return True, f"Downloaded from Freesound (by {username}, {license_name})", file_path
except httpx.TimeoutException:
return False, "Download timeout", None
except Exception as e:
return False, f"Download error: {str(e)}", None
def get_existing_bgm_ids(output_dir: str) -> set:
"""Get set of existing BGM filenames (without extension) in the directory."""
existing = set()
if os.path.exists(output_dir):
for filename in os.listdir(output_dir):
if filename.endswith((".mp3", ".wav", ".m4a", ".ogg")):
# Extract base name without extension
base_name = os.path.splitext(filename)[0]
existing.add(base_name.lower())
return existing
async def search_and_download_bgm(
keywords: List[str],
output_dir: str,
max_duration: int = 120,
commercial_only: bool = True,
count: int = 1,
category: Optional[str] = None,
) -> Tuple[bool, str, Optional[str], Optional[BGMSearchResult]]:
"""
Search for BGM and download the best match.
Args:
keywords: Search keywords from BGM recommendation
output_dir: Directory to save downloaded file
max_duration: Maximum duration in seconds
commercial_only: Only search commercially usable licenses (CC0)
count: Number of BGMs to download (default: 1)
category: Category name to prefix filename (e.g., 'upbeat', 'chill')
Returns:
Tuple of (success, message, file_path, matched_result)
When count > 1, file_path contains the last downloaded file path
"""
if not settings.FREESOUND_API_KEY:
return False, "Freesound API key not configured", None, None
# Get existing BGM files to skip duplicates
existing_bgm = get_existing_bgm_ids(output_dir)
# Try searching with combined keywords
query = " ".join(keywords[:3])
success, message, results = await search_freesound(
query=query,
min_duration=15,
max_duration=max_duration,
page_size=max(count * 3, 15), # Get more results to filter duplicates
commercial_only=commercial_only,
)
if not success or not results:
# Try with individual keywords
for keyword in keywords[:3]:
success, message, results = await search_freesound(
query=keyword,
min_duration=15,
max_duration=max_duration,
page_size=max(count * 3, 15),
commercial_only=commercial_only,
)
if success and results:
break
if not results:
return False, "No matching BGM found on Freesound", None, None
# Download multiple BGMs
downloaded_count = 0
skipped_count = 0
last_file_path = None
last_match = None
messages = []
for result in results:
if downloaded_count >= count:
break
# Generate safe filename with category prefix
base_name = result.title.lower().replace(" ", "_")[:50]
base_name = "".join(c for c in base_name if c.isalnum() or c == "_")
if category:
safe_filename = f"{category}_{base_name}"
else:
safe_filename = base_name
# Skip if already exists
if safe_filename.lower() in existing_bgm:
skipped_count += 1
continue
# Download it
dl_success, download_msg, file_path = await download_freesound(
sound_id=result.id,
output_dir=output_dir,
filename=safe_filename,
)
if dl_success:
downloaded_count += 1
last_file_path = file_path
last_match = result
existing_bgm.add(safe_filename.lower()) # Add to existing set
messages.append(f"Downloaded: {result.title}")
else:
messages.append(f"Failed: {result.title} - {download_msg}")
if downloaded_count == 0:
if skipped_count > 0:
return True, f"All {skipped_count} BGMs already exist, skipped", None, None
return False, "Failed to download any BGM", None, None
summary = f"Downloaded {downloaded_count} BGM(s)"
if skipped_count > 0:
summary += f", skipped {skipped_count} existing"
return True, summary, last_file_path, last_match
async def search_pixabay_music(
query: str = "",
category: str = "",
min_duration: int = 0,
max_duration: int = 120,
page: int = 1,
per_page: int = 20,
) -> Tuple[bool, str, List[BGMSearchResult]]:
"""
Search for royalty-free music on Pixabay.
Note: Pixabay doesn't have a public Music API, returns curated list instead.
"""
# Pixabay's music API is not publicly available
# Return curated recommendations instead
return await _get_curated_bgm_list(query)
async def _get_curated_bgm_list(query: str = "") -> Tuple[bool, str, List[BGMSearchResult]]:
"""
Return curated list of recommended free BGM sources.
Since Pixabay Music API requires special access, we provide curated recommendations.
"""
# Curated BGM recommendations (these are categories/suggestions, not actual files)
curated_bgm = [
{
"id": "upbeat_energetic",
"title": "Upbeat & Energetic",
"duration": 60,
"tags": ["upbeat", "energetic", "happy", "positive"],
"description": "활기찬 쇼츠에 적합",
},
{
"id": "chill_lofi",
"title": "Chill Lo-Fi",
"duration": 60,
"tags": ["chill", "lofi", "relaxing", "calm"],
"description": "편안한 분위기의 콘텐츠",
},
{
"id": "epic_cinematic",
"title": "Epic & Cinematic",
"duration": 60,
"tags": ["epic", "cinematic", "dramatic", "intense"],
"description": "드라마틱한 순간",
},
{
"id": "funny_quirky",
"title": "Funny & Quirky",
"duration": 30,
"tags": ["funny", "quirky", "comedy", "playful"],
"description": "유머러스한 콘텐츠",
},
{
"id": "corporate_tech",
"title": "Corporate & Tech",
"duration": 60,
"tags": ["corporate", "tech", "modern", "professional"],
"description": "정보성 콘텐츠",
},
]
# Filter by query if provided
if query:
query_lower = query.lower()
filtered = [
bgm for bgm in curated_bgm
if query_lower in bgm["title"].lower()
or any(query_lower in tag for tag in bgm["tags"])
]
curated_bgm = filtered if filtered else curated_bgm
results = [
BGMSearchResult(
id=bgm["id"],
title=bgm["title"],
duration=bgm["duration"],
tags=bgm["tags"],
preview_url="", # Would be filled with actual URL
source="curated",
)
for bgm in curated_bgm
]
return True, "Curated BGM list", results
async def download_from_url(
url: str,
output_path: str,
filename: str,
) -> Tuple[bool, str, Optional[str]]:
"""
Download audio file from URL.
Args:
url: Audio file URL
output_path: Directory to save file
filename: Output filename (without extension)
Returns:
Tuple of (success, message, file_path)
"""
try:
os.makedirs(output_path, exist_ok=True)
async with httpx.AsyncClient() as client:
response = await client.get(url, timeout=60, follow_redirects=True)
if response.status_code != 200:
return False, f"Download failed: HTTP {response.status_code}", None
# Determine file extension from content-type
content_type = response.headers.get("content-type", "")
if "mpeg" in content_type:
ext = ".mp3"
elif "wav" in content_type:
ext = ".wav"
elif "ogg" in content_type:
ext = ".ogg"
else:
ext = ".mp3" # Default to mp3
file_path = os.path.join(output_path, f"{filename}{ext}")
with open(file_path, "wb") as f:
f.write(response.content)
return True, "Download complete", file_path
except Exception as e:
return False, f"Download error: {str(e)}", None
# Popular free BGM download links
FREE_BGM_SOURCES = {
"freesound": {
"name": "Freesound",
"url": "https://freesound.org/",
"license": "CC0/CC-BY/CC-BY-NC (Various)",
"description": "500,000+ CC licensed sounds, API available",
"api_available": True,
"api_url": "https://freesound.org/apiv2/apply",
},
"pixabay": {
"name": "Pixabay Music",
"url": "https://pixabay.com/music/",
"license": "Pixabay License (Free for commercial use)",
"description": "Large collection of royalty-free music",
"api_available": False,
},
"mixkit": {
"name": "Mixkit",
"url": "https://mixkit.co/free-stock-music/",
"license": "Mixkit License (Free for commercial use)",
"description": "High-quality free music tracks",
"api_available": False,
},
"uppbeat": {
"name": "Uppbeat",
"url": "https://uppbeat.io/",
"license": "Free tier: 10 tracks/month",
"description": "YouTube-friendly music",
"api_available": False,
},
"youtube_audio_library": {
"name": "YouTube Audio Library",
"url": "https://studio.youtube.com/channel/UC/music",
"license": "Free for YouTube videos",
"description": "Google's free music library",
"api_available": False,
},
}
def get_free_bgm_sources() -> dict:
"""Get list of recommended free BGM sources."""
return FREE_BGM_SOURCES