bini-shorts-maker/backend/app/services/bgm_provider.py

"""
BGM Provider Service - Freesound & Pixabay Integration

Freesound API: https://freesound.org/docs/api/
- 500,000+ Creative Commons licensed sounds
- Free API with generous rate limits
- Various licenses (CC0, CC-BY, CC-BY-NC, etc.)

Pixabay: Manual download recommended (no public Music API)
"""

import os
import httpx
import aiofiles
from typing import Optional, List, Tuple
from pydantic import BaseModel
from app.config import settings


class FreesoundTrack(BaseModel):
    """Freesound track model."""
    id: int
    name: str
    duration: float  # seconds
    tags: List[str]
    license: str
    username: str
    preview_url: str  # HQ preview (128kbps mp3)
    download_url: str  # Original file (requires auth)
    description: str = ""


class BGMSearchResult(BaseModel):
    """BGM search result."""
    id: str
    title: str
    duration: int
    tags: List[str]
    preview_url: str
    download_url: str = ""
    license: str = ""
    source: str = "freesound"


# Freesound license filters for commercial use
# CC0 and CC-BY are commercially usable, CC-BY-NC is NOT
COMMERCIAL_LICENSES = [
    "Creative Commons 0",           # CC0 - Public Domain
    "Attribution",                  # CC-BY - Attribution required
    "Attribution Noncommercial",    # Exclude this (NOT commercial)
]

# License filter string for commercial-only search
COMMERCIAL_LICENSE_FILTER = 'license:"Creative Commons 0" OR license:"Attribution"'


async def search_freesound(
    query: str,
    min_duration: int = 10,
    max_duration: int = 180,  # Shorts typically < 60s, allow some buffer
    page: int = 1,
    page_size: int = 15,
    filter_music: bool = True,
    commercial_only: bool = True,  # Default: only commercially usable
) -> Tuple[bool, str, List[BGMSearchResult]]:
    """
    Search for sounds on Freesound API.

    Args:
        query: Search keywords (e.g., "upbeat music", "chill background")
        min_duration: Minimum duration in seconds
        max_duration: Maximum duration in seconds
        page: Page number (1-indexed)
        page_size: Results per page (max 150)
        filter_music: Add "music" to query for better BGM results
        commercial_only: Only return commercially usable licenses (CC0, CC-BY)

    Returns:
        Tuple of (success, message, results)
    """
    api_key = settings.FREESOUND_API_KEY
    if not api_key:
        return False, "Freesound API key not configured. Get one at https://freesound.org/apiv2/apply", []

    # Add "music" filter for better BGM results
    search_query = f"{query} music" if filter_music and "music" not in query.lower() else query

    # Build filter string for duration and license
    filter_parts = [f"duration:[{min_duration} TO {max_duration}]"]

    if commercial_only:
        # Filter for commercially usable licenses only
        # CC0 (Creative Commons 0) and CC-BY (Attribution) are commercial-OK
        # Exclude CC-BY-NC (Noncommercial)
        filter_parts.append('license:"Creative Commons 0"')

    filter_str = " ".join(filter_parts)

    params = {
        "token": api_key,
        "query": search_query,
        "filter": filter_str,
        "page": page,
        "page_size": min(page_size, 150),
        "fields": "id,name,duration,tags,license,username,previews,description",
        "sort": "score",  # relevance
    }

    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                "https://freesound.org/apiv2/search/text/",
                params=params,
                timeout=30,
            )

            if response.status_code == 401:
                return False, "Invalid Freesound API key", []

            if response.status_code != 200:
                return False, f"Freesound API error: HTTP {response.status_code}", []

            data = response.json()
            results = []

            for sound in data.get("results", []):
                # Get preview URLs (prefer high quality)
                previews = sound.get("previews", {})
                preview_url = (
                    previews.get("preview-hq-mp3") or
                    previews.get("preview-lq-mp3") or
                    ""
                )

                # Parse license for display
                license_url = sound.get("license", "")
                license_name = _parse_freesound_license(license_url)

                results.append(BGMSearchResult(
                    id=str(sound["id"]),
                    title=sound.get("name", "Unknown"),
                    duration=int(sound.get("duration", 0)),
                    tags=sound.get("tags", [])[:10],  # Limit tags
                    preview_url=preview_url,
                    download_url=f"https://freesound.org/apiv2/sounds/{sound['id']}/download/",
                    license=license_name,
                    source="freesound",
                ))

            total = data.get("count", 0)
            license_info = " (commercial use OK)" if commercial_only else ""
            message = f"Found {total} sounds on Freesound{license_info}"

            return True, message, results

    except httpx.TimeoutException:
        return False, "Freesound API timeout", []
    except Exception as e:
        return False, f"Freesound search error: {str(e)}", []


def _parse_freesound_license(license_url: str) -> str:
    """Parse Freesound license URL to human-readable name."""
    if "zero" in license_url or "cc0" in license_url.lower():
        return "CC0 (Public Domain)"
    elif "by-nc" in license_url:
        return "CC BY-NC (Non-Commercial)"
    elif "by-sa" in license_url:
        return "CC BY-SA (Share Alike)"
    elif "by/" in license_url:
        return "CC BY (Attribution)"
    elif "sampling+" in license_url:
        return "Sampling+"
    else:
        return "See License"


async def download_freesound(
    sound_id: str,
    output_dir: str,
    filename: str,
) -> Tuple[bool, str, Optional[str]]:
    """
    Download a sound from Freesound.

    Note: Freesound requires OAuth for original file downloads.
    This function downloads the HQ preview (128kbps MP3) which is sufficient for BGM.

    Args:
        sound_id: Freesound sound ID
        output_dir: Directory to save file
        filename: Output filename (without extension)

    Returns:
        Tuple of (success, message, file_path)
    """
    api_key = settings.FREESOUND_API_KEY
    if not api_key:
        return False, "Freesound API key not configured", None

    try:
        async with httpx.AsyncClient() as client:
            # First, get sound info to get preview URL
            info_response = await client.get(
                f"https://freesound.org/apiv2/sounds/{sound_id}/",
                params={
                    "token": api_key,
                    "fields": "id,name,previews,license,username",
                },
                timeout=30,
            )

            if info_response.status_code != 200:
                return False, f"Failed to get sound info: HTTP {info_response.status_code}", None

            sound_data = info_response.json()
            previews = sound_data.get("previews", {})

            # Get high quality preview URL
            preview_url = previews.get("preview-hq-mp3")
            if not preview_url:
                preview_url = previews.get("preview-lq-mp3")

            if not preview_url:
                return False, "No preview URL available", None

            # Download the preview
            audio_response = await client.get(preview_url, timeout=60, follow_redirects=True)

            if audio_response.status_code != 200:
                return False, f"Download failed: HTTP {audio_response.status_code}", None

            # Save file
            os.makedirs(output_dir, exist_ok=True)
            file_path = os.path.join(output_dir, f"{filename}.mp3")

            async with aiofiles.open(file_path, 'wb') as f:
                await f.write(audio_response.content)

            # Get attribution info
            username = sound_data.get("username", "Unknown")
            license_name = _parse_freesound_license(sound_data.get("license", ""))

            return True, f"Downloaded from Freesound (by {username}, {license_name})", file_path

    except httpx.TimeoutException:
        return False, "Download timeout", None
    except Exception as e:
        return False, f"Download error: {str(e)}", None


async def search_and_download_bgm(
    keywords: List[str],
    output_dir: str,
    max_duration: int = 120,
    commercial_only: bool = True,
) -> Tuple[bool, str, Optional[str], Optional[BGMSearchResult]]:
    """
    Search for BGM and download the best match.

    Args:
        keywords: Search keywords from BGM recommendation
        output_dir: Directory to save downloaded file
        max_duration: Maximum duration in seconds
        commercial_only: Only search commercially usable licenses (CC0)

    Returns:
        Tuple of (success, message, file_path, matched_result)
    """
    if not settings.FREESOUND_API_KEY:
        return False, "Freesound API key not configured", None, None

    # Try searching with combined keywords
    query = " ".join(keywords[:3])

    success, message, results = await search_freesound(
        query=query,
        min_duration=15,
        max_duration=max_duration,
        page_size=10,
        commercial_only=commercial_only,
    )

    if not success or not results:
        # Try with individual keywords
        for keyword in keywords[:3]:
            success, message, results = await search_freesound(
                query=keyword,
                min_duration=15,
                max_duration=max_duration,
                page_size=5,
                commercial_only=commercial_only,
            )
            if success and results:
                break

    if not results:
        return False, "No matching BGM found on Freesound", None, None

    # Select the best result (first one, sorted by relevance)
    best_match = results[0]

    # Download it
    safe_filename = best_match.title.lower().replace(" ", "_")[:50]
    safe_filename = "".join(c for c in safe_filename if c.isalnum() or c == "_")

    success, download_msg, file_path = await download_freesound(
        sound_id=best_match.id,
        output_dir=output_dir,
        filename=safe_filename,
    )

    if not success:
        return False, download_msg, None, best_match

    return True, download_msg, file_path, best_match


async def search_pixabay_music(
    query: str = "",
    category: str = "",
    min_duration: int = 0,
    max_duration: int = 120,
    page: int = 1,
    per_page: int = 20,
) -> Tuple[bool, str, List[BGMSearchResult]]:
    """
    Search for royalty-free music on Pixabay.
    Note: Pixabay doesn't have a public Music API, returns curated list instead.
    """
    # Pixabay's music API is not publicly available
    # Return curated recommendations instead
    return await _get_curated_bgm_list(query)


async def _get_curated_bgm_list(query: str = "") -> Tuple[bool, str, List[BGMSearchResult]]:
    """
    Return curated list of recommended free BGM sources.
    Since Pixabay Music API requires special access, we provide curated recommendations.
    """
    # Curated BGM recommendations (these are categories/suggestions, not actual files)
    curated_bgm = [
        {
            "id": "upbeat_energetic",
            "title": "Upbeat & Energetic",
            "duration": 60,
            "tags": ["upbeat", "energetic", "happy", "positive"],
            "description": "활기찬 쇼츠에 적합",
        },
        {
            "id": "chill_lofi",
            "title": "Chill Lo-Fi",
            "duration": 60,
            "tags": ["chill", "lofi", "relaxing", "calm"],
            "description": "편안한 분위기의 콘텐츠",
        },
        {
            "id": "epic_cinematic",
            "title": "Epic & Cinematic",
            "duration": 60,
            "tags": ["epic", "cinematic", "dramatic", "intense"],
            "description": "드라마틱한 순간",
        },
        {
            "id": "funny_quirky",
            "title": "Funny & Quirky",
            "duration": 30,
            "tags": ["funny", "quirky", "comedy", "playful"],
            "description": "유머러스한 콘텐츠",
        },
        {
            "id": "corporate_tech",
            "title": "Corporate & Tech",
            "duration": 60,
            "tags": ["corporate", "tech", "modern", "professional"],
            "description": "정보성 콘텐츠",
        },
    ]

    # Filter by query if provided
    if query:
        query_lower = query.lower()
        filtered = [
            bgm for bgm in curated_bgm
            if query_lower in bgm["title"].lower()
            or any(query_lower in tag for tag in bgm["tags"])
        ]
        curated_bgm = filtered if filtered else curated_bgm

    results = [
        BGMSearchResult(
            id=bgm["id"],
            title=bgm["title"],
            duration=bgm["duration"],
            tags=bgm["tags"],
            preview_url="",  # Would be filled with actual URL
            source="curated",
        )
        for bgm in curated_bgm
    ]

    return True, "Curated BGM list", results


async def download_from_url(
    url: str,
    output_path: str,
    filename: str,
) -> Tuple[bool, str, Optional[str]]:
    """
    Download audio file from URL.

    Args:
        url: Audio file URL
        output_path: Directory to save file
        filename: Output filename (without extension)

    Returns:
        Tuple of (success, message, file_path)
    """
    try:
        os.makedirs(output_path, exist_ok=True)

        async with httpx.AsyncClient() as client:
            response = await client.get(url, timeout=60, follow_redirects=True)

            if response.status_code != 200:
                return False, f"Download failed: HTTP {response.status_code}", None

            # Determine file extension from content-type
            content_type = response.headers.get("content-type", "")
            if "mpeg" in content_type:
                ext = ".mp3"
            elif "wav" in content_type:
                ext = ".wav"
            elif "ogg" in content_type:
                ext = ".ogg"
            else:
                ext = ".mp3"  # Default to mp3

            file_path = os.path.join(output_path, f"{filename}{ext}")

            with open(file_path, "wb") as f:
                f.write(response.content)

            return True, "Download complete", file_path

    except Exception as e:
        return False, f"Download error: {str(e)}", None


# Popular free BGM download links
FREE_BGM_SOURCES = {
    "freesound": {
        "name": "Freesound",
        "url": "https://freesound.org/",
        "license": "CC0/CC-BY/CC-BY-NC (Various)",
        "description": "500,000+ CC licensed sounds, API available",
        "api_available": True,
        "api_url": "https://freesound.org/apiv2/apply",
    },
    "pixabay": {
        "name": "Pixabay Music",
        "url": "https://pixabay.com/music/",
        "license": "Pixabay License (Free for commercial use)",
        "description": "Large collection of royalty-free music",
        "api_available": False,
    },
    "mixkit": {
        "name": "Mixkit",
        "url": "https://mixkit.co/free-stock-music/",
        "license": "Mixkit License (Free for commercial use)",
        "description": "High-quality free music tracks",
        "api_available": False,
    },
    "uppbeat": {
        "name": "Uppbeat",
        "url": "https://uppbeat.io/",
        "license": "Free tier: 10 tracks/month",
        "description": "YouTube-friendly music",
        "api_available": False,
    },
    "youtube_audio_library": {
        "name": "YouTube Audio Library",
        "url": "https://studio.youtube.com/channel/UC/music",
        "license": "Free for YouTube videos",
        "description": "Google's free music library",
        "api_available": False,
    },
}


def get_free_bgm_sources() -> dict:
    """Get list of recommended free BGM sources."""
    return FREE_BGM_SOURCES