Files
bini-shorts-maker/backend/app/routers/process.py
kihong.kim 5c57f33903 feat: 타임라인 에디터 및 비디오 스튜디오 컴포넌트 추가
- TimelineEditor, VideoStudio 컴포넌트 신규 추가
- 백엔드 transcriber, video_processor 서비스 개선
- 프론트엔드 HomePage 리팩토링 및 스타일 업데이트

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-06 21:21:58 +09:00

1076 lines
37 KiB
Python

import os
from fastapi import APIRouter, BackgroundTasks, HTTPException
from app.models.schemas import (
ProcessRequest, ProcessResponse, JobStatus, SubtitleStyle,
TrimRequest, TrimResponse, VideoInfoResponse,
TranscribeRequest, RenderRequest,
)
from app.models.job_store import job_store
from app.services.transcriber import transcribe_video, segments_to_ass
from app.services.translator import translate_segments, TranslationMode
from app.services.video_processor import process_video, trim_video, get_video_info, get_video_duration, extract_frame
from app.services.thumbnail import generate_thumbnail, generate_catchphrase, get_video_timestamps
from app.config import settings
router = APIRouter()
async def process_task(
job_id: str,
bgm_id: str | None,
bgm_volume: float,
subtitle_style: SubtitleStyle | None,
keep_original_audio: bool,
translation_mode: str | None = None,
use_vocal_separation: bool = False,
):
"""Background task for full video processing pipeline."""
job = job_store.get_job(job_id)
if not job or not job.video_path:
job_store.update_job(job_id, status=JobStatus.FAILED, error="Job or video not found")
return
try:
# Progress callback for real-time status updates
async def progress_callback(step: str, progress: int):
step_to_status = {
"vocal_separation": JobStatus.EXTRACTING_AUDIO,
"extracting_audio": JobStatus.EXTRACTING_AUDIO,
"noise_reduction": JobStatus.NOISE_REDUCTION,
"transcribing": JobStatus.TRANSCRIBING,
}
status = step_to_status.get(step, JobStatus.TRANSCRIBING)
print(f"[Progress] Step: {step} -> Status: {status}, Progress: {progress}%")
job_store.update_job(job_id, status=status, progress=progress)
# Start with initial status
job_store.update_job(job_id, status=JobStatus.EXTRACTING_AUDIO, progress=10)
success, message, segments, detected_lang = await transcribe_video(
job.video_path,
use_noise_reduction=True,
noise_reduction_level="medium",
use_vocal_separation=use_vocal_separation,
progress_callback=progress_callback,
)
# Handle special cases
if not success:
if message in ("NO_AUDIO", "SILENT_AUDIO", "SINGING_ONLY"):
# Video has no usable speech - allow manual subtitle input or BGM-only processing
if message == "NO_AUDIO":
audio_status = "no_audio_stream"
elif message == "SILENT_AUDIO":
audio_status = "audio_silent"
else: # SINGING_ONLY
audio_status = "singing_only"
job_store.update_job(
job_id,
status=JobStatus.AWAITING_SUBTITLE,
progress=35,
has_audio=message != "NO_AUDIO", # Has audio if it's singing
audio_status=audio_status,
)
return
else:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
return
# Audio OK - speech detected and transcribed
job_store.update_job(job_id, transcript=segments, progress=50, has_audio=True, audio_status="ok", detected_language=detected_lang)
# Step 2: Translate/Rewrite (only for Chinese content)
# Check if the detected language is Chinese (zh, zh-cn, zh-tw, chinese)
is_chinese = detected_lang and detected_lang.lower() in ['zh', 'zh-cn', 'zh-tw', 'chinese', 'mandarin']
if is_chinese:
job_store.update_job(job_id, status=JobStatus.TRANSLATING, progress=55)
mode = translation_mode or settings.TRANSLATION_MODE
success, message, segments = await translate_segments(
segments,
mode=mode,
max_tokens=settings.TRANSLATION_MAX_TOKENS,
)
if not success:
# Continue with original text if translation fails
print(f"Translation warning: {message}")
job_store.update_job(job_id, transcript=segments, progress=70)
else:
# Skip translation for non-Chinese content - just use original text as-is
print(f"Skipping GPT translation for non-Chinese content (detected: {detected_lang})")
# Set translated to original text for subtitle generation
for seg in segments:
seg.translated = seg.text
job_store.update_job(job_id, transcript=segments, progress=70)
# Step 3: Generate subtitle file
style = subtitle_style or SubtitleStyle()
subtitle_content = segments_to_ass(
segments,
use_translated=True,
font_size=style.font_size,
font_color=style.font_color.lstrip("#"),
outline_color=style.outline_color.lstrip("#"),
font_name=style.font_name,
position=style.position, # top, center, bottom
margin_v=style.margin_v,
outline_width=style.outline_width,
bold=style.bold,
shadow=style.shadow,
background_box=style.background_box,
background_opacity=style.background_opacity,
animation=style.animation,
max_chars_per_line=style.max_chars_per_line,
)
# Save subtitle file
job_dir = os.path.dirname(job.video_path)
subtitle_path = os.path.join(job_dir, "subtitle.ass")
with open(subtitle_path, "w", encoding="utf-8") as f:
f.write(subtitle_content)
# Step 4: Process video
job_store.update_job(job_id, status=JobStatus.PROCESSING, progress=75)
# Determine BGM path
bgm_path = None
if bgm_id:
bgm_path = os.path.join(settings.BGM_DIR, f"{bgm_id}.mp3")
if not os.path.exists(bgm_path):
bgm_path = os.path.join(settings.BGM_DIR, f"{bgm_id}.wav")
# Output path
output_dir = os.path.join(settings.PROCESSED_DIR, job_id)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "output.mp4")
success, message = await process_video(
input_path=job.video_path,
output_path=output_path,
subtitle_path=subtitle_path,
bgm_path=bgm_path,
bgm_volume=bgm_volume,
keep_original_audio=keep_original_audio,
)
if success:
job_store.update_job(
job_id,
status=JobStatus.COMPLETED,
output_path=output_path,
progress=100,
)
else:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
except Exception as e:
job_store.update_job(job_id, status=JobStatus.FAILED, error=str(e))
@router.post("/", response_model=ProcessResponse)
async def start_processing(
request: ProcessRequest,
background_tasks: BackgroundTasks
):
"""Start video processing (transcribe, translate, add subtitles/BGM)."""
job = job_store.get_job(request.job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path:
raise HTTPException(status_code=400, detail="Video not downloaded yet")
if job.status == JobStatus.DOWNLOADING:
raise HTTPException(status_code=400, detail="Download still in progress")
# Start background processing
background_tasks.add_task(
process_task,
request.job_id,
request.bgm_id,
request.bgm_volume,
request.subtitle_style,
request.keep_original_audio,
request.translation_mode,
request.use_vocal_separation,
)
mode_info = f" (mode: {request.translation_mode or settings.TRANSLATION_MODE})"
vocal_sep_info = ", vocal separation" if request.use_vocal_separation else ""
return ProcessResponse(
job_id=request.job_id,
status=JobStatus.TRANSCRIBING,
message=f"Processing started{mode_info}{vocal_sep_info}"
)
@router.post("/{job_id}/transcribe")
async def transcribe_only(job_id: str, background_tasks: BackgroundTasks):
"""Transcribe video only (without full processing)."""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path:
raise HTTPException(status_code=400, detail="Video not downloaded yet")
async def transcribe_task():
job_store.update_job(job_id, status=JobStatus.TRANSCRIBING, progress=35)
success, message, segments = await transcribe_video(job.video_path)
if success:
job_store.update_job(
job_id,
transcript=segments,
status=JobStatus.PENDING,
progress=50,
)
else:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
background_tasks.add_task(transcribe_task)
return {"message": "Transcription started"}
@router.put("/{job_id}/transcript")
async def update_transcript(job_id: str, segments: list[dict]):
"""Update transcript segments (for manual editing)."""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
from app.models.schemas import TranscriptSegment
updated_segments = []
for seg in segments:
# Handle both dict and Pydantic model-like objects
if hasattr(seg, 'model_dump'):
seg = seg.model_dump()
updated_segments.append(TranscriptSegment(**seg))
job_store.update_job(job_id, transcript=updated_segments)
return {"message": "Transcript updated"}
async def continue_processing_task(
job_id: str,
bgm_id: str | None,
bgm_volume: float,
subtitle_style: SubtitleStyle | None,
keep_original_audio: bool,
skip_subtitle: bool = False,
):
"""Continue processing for jobs that were awaiting subtitle input."""
job = job_store.get_job(job_id)
if not job or not job.video_path:
job_store.update_job(job_id, status=JobStatus.FAILED, error="Job or video not found")
return
try:
subtitle_path = None
if not skip_subtitle and job.transcript:
# Generate subtitle file from existing transcript
job_store.update_job(job_id, status=JobStatus.PROCESSING, progress=60)
style = subtitle_style or SubtitleStyle()
subtitle_content = segments_to_ass(
job.transcript,
use_translated=True,
font_size=style.font_size,
font_color=style.font_color.lstrip("#"),
outline_color=style.outline_color.lstrip("#"),
font_name=style.font_name,
position=style.position,
margin_v=style.margin_v,
outline_width=style.outline_width,
bold=style.bold,
shadow=style.shadow,
background_box=style.background_box,
background_opacity=style.background_opacity,
animation=style.animation,
max_chars_per_line=style.max_chars_per_line,
)
job_dir = os.path.dirname(job.video_path)
subtitle_path = os.path.join(job_dir, "subtitle.ass")
with open(subtitle_path, "w", encoding="utf-8") as f:
f.write(subtitle_content)
else:
# Skip subtitle - process with BGM only
job_store.update_job(job_id, status=JobStatus.PROCESSING, progress=60)
# Process video
job_store.update_job(job_id, progress=75)
bgm_path = None
if bgm_id:
bgm_path = os.path.join(settings.BGM_DIR, f"{bgm_id}.mp3")
if not os.path.exists(bgm_path):
bgm_path = os.path.join(settings.BGM_DIR, f"{bgm_id}.wav")
output_dir = os.path.join(settings.PROCESSED_DIR, job_id)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "output.mp4")
success, message = await process_video(
input_path=job.video_path,
output_path=output_path,
subtitle_path=subtitle_path,
bgm_path=bgm_path,
bgm_volume=bgm_volume,
keep_original_audio=keep_original_audio and job.has_audio, # Only keep if has audio
)
if success:
job_store.update_job(
job_id,
status=JobStatus.COMPLETED,
output_path=output_path,
progress=100,
)
else:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
except Exception as e:
job_store.update_job(job_id, status=JobStatus.FAILED, error=str(e))
@router.post("/{job_id}/continue")
async def continue_processing(
job_id: str,
request: ProcessRequest,
background_tasks: BackgroundTasks,
):
"""Continue or reprocess a job (supports awaiting_subtitle and completed status)."""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
# 허용된 상태: awaiting_subtitle (음성없음), completed (재처리)
allowed_statuses = [JobStatus.AWAITING_SUBTITLE, JobStatus.COMPLETED]
if job.status not in allowed_statuses:
raise HTTPException(
status_code=400,
detail=f"Job status must be 'awaiting_subtitle' or 'completed', current: {job.status}"
)
background_tasks.add_task(
continue_processing_task,
job_id,
request.bgm_id,
request.bgm_volume,
request.subtitle_style,
request.keep_original_audio,
skip_subtitle=not job.transcript, # Skip if no transcript
)
mode = "reprocessing" if job.status == JobStatus.COMPLETED else "no audio mode"
return ProcessResponse(
job_id=job_id,
status=JobStatus.PROCESSING,
message=f"Continuing processing ({mode})"
)
@router.post("/{job_id}/manual-subtitle")
async def add_manual_subtitle(
job_id: str,
segments: list,
background_tasks: BackgroundTasks,
):
"""Add manual subtitle segments and continue processing."""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status != JobStatus.AWAITING_SUBTITLE:
raise HTTPException(
status_code=400,
detail=f"Job status must be 'awaiting_subtitle', current: {job.status}"
)
# Parse and save the manual segments
from app.models.schemas import TranscriptSegment
manual_segments = [TranscriptSegment(**seg) for seg in segments]
job_store.update_job(job_id, transcript=manual_segments)
return {"message": "Manual subtitles added", "segment_count": len(manual_segments)}
@router.get("/{job_id}/video-info", response_model=VideoInfoResponse)
async def get_job_video_info(job_id: str):
"""Get video information for trimming UI."""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path or not os.path.exists(job.video_path):
raise HTTPException(status_code=400, detail="Video file not found")
info = await get_video_info(job.video_path)
if not info:
# Fallback to just duration
duration = await get_video_duration(job.video_path)
if duration is None:
raise HTTPException(status_code=500, detail="Could not get video info")
info = {"duration": duration}
return VideoInfoResponse(
duration=info.get("duration", 0),
width=info.get("width"),
height=info.get("height"),
)
@router.get("/{job_id}/frame")
async def get_frame_at_timestamp(job_id: str, timestamp: float):
"""
Extract a single frame at the specified timestamp.
Returns the frame as a JPEG image.
Used for precise trimming preview.
"""
from fastapi.responses import FileResponse
import tempfile
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path or not os.path.exists(job.video_path):
raise HTTPException(status_code=400, detail="Video file not found")
# Get video duration to validate timestamp
duration = await get_video_duration(job.video_path)
if duration is None:
raise HTTPException(status_code=500, detail="Could not get video duration")
# Clamp timestamp to valid range
timestamp = max(0, min(timestamp, duration - 0.01))
# Create temp file for frame
temp_dir = tempfile.gettempdir()
frame_path = os.path.join(temp_dir, f"frame_{job_id}_{timestamp:.3f}.jpg")
success, message = await extract_frame(job.video_path, frame_path, timestamp)
if not success:
raise HTTPException(status_code=500, detail=message)
return FileResponse(
path=frame_path,
media_type="image/jpeg",
headers={
"Cache-Control": "public, max-age=60",
}
)
@router.post("/{job_id}/trim", response_model=TrimResponse)
async def trim_job_video(
job_id: str,
request: TrimRequest,
background_tasks: BackgroundTasks,
):
"""
Trim video to specified time range and optionally reprocess.
This creates a new trimmed video file, replacing the original.
Use this to remove sections with unwanted content (like original subtitles).
"""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path or not os.path.exists(job.video_path):
raise HTTPException(status_code=400, detail="Video file not found")
# Validate time range
duration = await get_video_duration(job.video_path)
if duration is None:
raise HTTPException(status_code=500, detail="Could not get video duration")
if request.start_time < 0 or request.end_time > duration:
raise HTTPException(
status_code=400,
detail=f"Invalid time range. Video duration is {duration:.1f}s"
)
if request.start_time >= request.end_time:
raise HTTPException(
status_code=400,
detail="Start time must be less than end time"
)
# Update status
job_store.update_job(job_id, status=JobStatus.TRIMMING, progress=5)
# Create trimmed video path
video_dir = os.path.dirname(job.video_path)
video_ext = os.path.splitext(job.video_path)[1]
trimmed_path = os.path.join(video_dir, f"trimmed{video_ext}")
# Convert exclude_regions to list of dicts for the function
exclude_regions = None
if request.exclude_regions:
exclude_regions = [
{'start': region.start, 'end': region.end}
for region in request.exclude_regions
]
# Perform trim
success, message = await trim_video(
job.video_path,
trimmed_path,
request.start_time,
request.end_time,
exclude_regions,
)
if not success:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
return TrimResponse(
job_id=job_id,
success=False,
message=message,
)
# Replace original with trimmed video
original_path = job.video_path
backup_path = os.path.join(video_dir, f"original_backup{video_ext}")
try:
# Backup original
os.rename(original_path, backup_path)
# Move trimmed to original location
os.rename(trimmed_path, original_path)
# Remove backup
os.remove(backup_path)
except Exception as e:
job_store.update_job(job_id, status=JobStatus.FAILED, error=f"File operation failed: {e}")
return TrimResponse(
job_id=job_id,
success=False,
message=f"File operation failed: {e}",
)
# Get new duration
new_duration = await get_video_duration(original_path)
# Reset job state for next step
job_store.update_job(
job_id,
status=JobStatus.READY_FOR_TRIM, # Stay in trim-ready state for manual workflow
progress=30,
transcript=None, # Clear old transcript
output_path=None, # Clear old output
error=None,
)
# Optionally start reprocessing
if request.reprocess:
background_tasks.add_task(
process_task,
job_id,
None, # bgm_id
0.3, # bgm_volume
None, # subtitle_style
True, # keep_original_audio
None, # translation_mode
False, # use_vocal_separation
)
message = f"Video trimmed to {new_duration:.1f}s. Reprocessing started."
else:
message = f"Video trimmed to {new_duration:.1f}s. Ready for processing."
return TrimResponse(
job_id=job_id,
success=True,
message=message,
new_duration=new_duration,
)
# ============================================================
# Step-by-step Processing API (Manual Workflow)
# ============================================================
async def transcribe_step_task(
job_id: str,
translation_mode: str | None = None,
use_vocal_separation: bool = False,
):
"""Background task for transcription step only (audio extraction + STT + translation)."""
job = job_store.get_job(job_id)
if not job or not job.video_path:
job_store.update_job(job_id, status=JobStatus.FAILED, error="Job or video not found")
return
try:
# Progress callback for real-time status updates
async def progress_callback(step: str, progress: int):
step_to_status = {
"vocal_separation": JobStatus.EXTRACTING_AUDIO,
"extracting_audio": JobStatus.EXTRACTING_AUDIO,
"noise_reduction": JobStatus.NOISE_REDUCTION,
"transcribing": JobStatus.TRANSCRIBING,
}
status = step_to_status.get(step, JobStatus.TRANSCRIBING)
print(f"[Progress] Step: {step} -> Status: {status}, Progress: {progress}%")
job_store.update_job(job_id, status=status, progress=progress)
# Start with initial status
job_store.update_job(job_id, status=JobStatus.EXTRACTING_AUDIO, progress=10)
success, message, segments, detected_lang = await transcribe_video(
job.video_path,
use_noise_reduction=True,
noise_reduction_level="medium",
use_vocal_separation=use_vocal_separation,
progress_callback=progress_callback,
)
# Handle special cases
if not success:
if message in ("NO_AUDIO", "SILENT_AUDIO", "SINGING_ONLY"):
if message == "NO_AUDIO":
audio_status = "no_audio_stream"
elif message == "SILENT_AUDIO":
audio_status = "audio_silent"
else:
audio_status = "singing_only"
job_store.update_job(
job_id,
status=JobStatus.AWAITING_SUBTITLE,
progress=35,
has_audio=message != "NO_AUDIO",
audio_status=audio_status,
)
return
else:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
return
# Audio OK - speech detected and transcribed
job_store.update_job(job_id, transcript=segments, progress=50, has_audio=True, audio_status="ok", detected_language=detected_lang)
# Translate/Rewrite (only for Chinese content)
is_chinese = detected_lang and detected_lang.lower() in ['zh', 'zh-cn', 'zh-tw', 'chinese', 'mandarin']
if is_chinese:
job_store.update_job(job_id, status=JobStatus.TRANSLATING, progress=55)
mode = translation_mode or settings.TRANSLATION_MODE
success, message, segments = await translate_segments(
segments,
mode=mode,
max_tokens=settings.TRANSLATION_MAX_TOKENS,
)
if not success:
print(f"Translation warning: {message}")
job_store.update_job(job_id, transcript=segments, progress=70)
else:
# Skip translation for non-Chinese content
print(f"Skipping GPT translation for non-Chinese content (detected: {detected_lang})")
for seg in segments:
seg.translated = seg.text
job_store.update_job(job_id, transcript=segments, progress=70)
# STOP HERE - Set status to AWAITING_REVIEW for user to review script
job_store.update_job(
job_id,
status=JobStatus.AWAITING_REVIEW,
progress=70,
)
except Exception as e:
job_store.update_job(job_id, status=JobStatus.FAILED, error=str(e))
async def render_step_task(
job_id: str,
bgm_id: str | None,
bgm_volume: float,
subtitle_style: SubtitleStyle | None,
keep_original_audio: bool,
intro_text: str | None = None,
intro_duration: float = 0.7,
intro_font_size: int = 100,
intro_position: str = "center",
):
"""Background task for final video rendering (subtitle composition + BGM + intro text)."""
job = job_store.get_job(job_id)
if not job or not job.video_path:
job_store.update_job(job_id, status=JobStatus.FAILED, error="Job or video not found")
return
try:
subtitle_path = None
if job.transcript:
# Generate subtitle file from transcript
job_store.update_job(job_id, status=JobStatus.PROCESSING, progress=75)
style = subtitle_style or SubtitleStyle()
# When intro text is shown, delay subtitles so they don't overlap
subtitle_offset = intro_duration if intro_text else 0.0
subtitle_content = segments_to_ass(
job.transcript,
use_translated=True,
font_size=style.font_size,
font_color=style.font_color.lstrip("#"),
outline_color=style.outline_color.lstrip("#"),
font_name=style.font_name,
position=style.position,
margin_v=style.margin_v,
outline_width=style.outline_width,
bold=style.bold,
shadow=style.shadow,
background_box=style.background_box,
background_opacity=style.background_opacity,
animation=style.animation,
time_offset=subtitle_offset,
max_chars_per_line=style.max_chars_per_line,
)
job_dir = os.path.dirname(job.video_path)
subtitle_path = os.path.join(job_dir, "subtitle.ass")
with open(subtitle_path, "w", encoding="utf-8") as f:
f.write(subtitle_content)
else:
# No subtitle - BGM only mode
job_store.update_job(job_id, status=JobStatus.PROCESSING, progress=75)
# Determine BGM path
bgm_path = None
if bgm_id:
bgm_path = os.path.join(settings.BGM_DIR, f"{bgm_id}.mp3")
if not os.path.exists(bgm_path):
bgm_path = os.path.join(settings.BGM_DIR, f"{bgm_id}.wav")
# Output path
output_dir = os.path.join(settings.PROCESSED_DIR, job_id)
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "output.mp4")
job_store.update_job(job_id, progress=80)
success, message = await process_video(
input_path=job.video_path,
output_path=output_path,
subtitle_path=subtitle_path,
bgm_path=bgm_path,
bgm_volume=bgm_volume,
keep_original_audio=keep_original_audio,
intro_text=intro_text,
intro_duration=intro_duration,
intro_font_size=intro_font_size,
intro_position=intro_position,
)
if success:
job_store.update_job(
job_id,
status=JobStatus.COMPLETED,
output_path=output_path,
progress=100,
)
else:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
except Exception as e:
job_store.update_job(job_id, status=JobStatus.FAILED, error=str(e))
@router.post("/{job_id}/start-transcription", response_model=ProcessResponse)
async def start_transcription(
job_id: str,
request: TranscribeRequest,
background_tasks: BackgroundTasks,
):
"""
Start transcription step (audio extraction + STT + translation).
After completion, job status will be 'awaiting_review' for user to review the script.
"""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path:
raise HTTPException(status_code=400, detail="Video not downloaded yet")
# Allow starting from ready_for_trim or pending status
allowed_statuses = [JobStatus.READY_FOR_TRIM, JobStatus.PENDING]
if job.status not in allowed_statuses:
raise HTTPException(
status_code=400,
detail=f"Job status must be 'ready_for_trim' or 'pending', current: {job.status}"
)
background_tasks.add_task(
transcribe_step_task,
job_id,
request.translation_mode,
request.use_vocal_separation,
)
return ProcessResponse(
job_id=job_id,
status=JobStatus.EXTRACTING_AUDIO,
message="Transcription started. Script will be ready for review."
)
@router.post("/{job_id}/render", response_model=ProcessResponse)
async def render_video(
job_id: str,
request: RenderRequest,
background_tasks: BackgroundTasks,
):
"""
Render final video with subtitles and BGM.
Call this after reviewing and editing the script.
"""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path:
raise HTTPException(status_code=400, detail="Video not found")
# Allow rendering from awaiting_review, awaiting_subtitle, or completed (re-render)
allowed_statuses = [JobStatus.AWAITING_REVIEW, JobStatus.AWAITING_SUBTITLE, JobStatus.COMPLETED]
if job.status not in allowed_statuses:
raise HTTPException(
status_code=400,
detail=f"Job status must be 'awaiting_review', 'awaiting_subtitle', or 'completed', current: {job.status}"
)
background_tasks.add_task(
render_step_task,
job_id,
request.bgm_id,
request.bgm_volume,
request.subtitle_style,
request.keep_original_audio,
request.intro_text,
request.intro_duration,
request.intro_font_size,
request.intro_position,
)
return ProcessResponse(
job_id=job_id,
status=JobStatus.PROCESSING,
message="Final video rendering started."
)
@router.post("/{job_id}/retranslate")
async def retranslate(job_id: str, background_tasks: BackgroundTasks):
"""
Re-run GPT translation on existing transcript segments.
Useful when user wants to regenerate the Korean script.
"""
import logging
logger = logging.getLogger(__name__)
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.transcript:
raise HTTPException(status_code=400, detail="No transcript found. Run transcription first.")
# Copy segments to avoid reference issues
segments_copy = [seg.model_copy() for seg in job.transcript]
logger.info(f"[Retranslate] Starting for job {job_id} with {len(segments_copy)} segments")
async def retranslate_task():
try:
logger.info(f"[Retranslate] Background task started for job {job_id}")
job_store.update_job(job_id, status=JobStatus.TRANSLATING, progress=55)
# Reset translations
for seg in segments_copy:
seg.translated = None
mode = settings.TRANSLATION_MODE
logger.info(f"[Retranslate] Using mode: {mode}")
success, message, translated_segments = await translate_segments(
segments_copy,
mode=mode,
max_tokens=settings.TRANSLATION_MAX_TOKENS,
)
logger.info(f"[Retranslate] Translation result: success={success}, message={message}")
if success:
job_store.update_job(
job_id,
transcript=translated_segments,
status=JobStatus.AWAITING_REVIEW,
progress=70,
)
logger.info(f"[Retranslate] Job {job_id} updated with new translation")
else:
job_store.update_job(job_id, status=JobStatus.FAILED, error=message)
logger.error(f"[Retranslate] Translation failed: {message}")
except Exception as e:
logger.exception(f"[Retranslate] Exception in retranslate_task: {e}")
job_store.update_job(job_id, status=JobStatus.FAILED, error=str(e))
# Use asyncio.create_task to properly run async background task
import asyncio
asyncio.create_task(retranslate_task())
return {"message": "Re-translation started", "job_id": job_id}
@router.post("/{job_id}/skip-trim", response_model=ProcessResponse)
async def skip_trim(job_id: str):
"""
Skip the trimming step and proceed to transcription.
Updates status from 'ready_for_trim' to 'pending'.
"""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if job.status != JobStatus.READY_FOR_TRIM:
raise HTTPException(
status_code=400,
detail=f"Job status must be 'ready_for_trim', current: {job.status}"
)
job_store.update_job(job_id, status=JobStatus.PENDING)
return ProcessResponse(
job_id=job_id,
status=JobStatus.PENDING,
message="Trim skipped. Ready for transcription."
)
# ============================================================
# Thumbnail Generation API
# ============================================================
@router.get("/{job_id}/thumbnail-timestamps")
async def get_thumbnail_timestamps(job_id: str, count: int = 5):
"""
Get suggested timestamps for thumbnail frame selection.
Returns evenly distributed timestamps from the video.
"""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path or not os.path.exists(job.video_path):
raise HTTPException(status_code=400, detail="Video file not found")
timestamps = await get_video_timestamps(job.video_path, count)
return {
"job_id": job_id,
"timestamps": timestamps,
"count": len(timestamps),
}
@router.post("/{job_id}/generate-catchphrase")
async def api_generate_catchphrase(job_id: str, style: str = "homeshopping"):
"""
Generate a catchy thumbnail text using GPT.
Available styles:
- homeshopping: 홈쇼핑 스타일 (강렬한 어필)
- viral: 바이럴 스타일 (호기심 유발)
- informative: 정보성 스타일 (명확한 전달)
"""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.transcript:
raise HTTPException(status_code=400, detail="No transcript found. Run transcription first.")
success, message, catchphrase = await generate_catchphrase(job.transcript, style)
if not success:
raise HTTPException(status_code=500, detail=message)
return {
"job_id": job_id,
"catchphrase": catchphrase,
"style": style,
}
@router.post("/{job_id}/thumbnail")
async def create_thumbnail(
job_id: str,
timestamp: float = 2.0,
style: str = "homeshopping",
custom_text: str | None = None,
font_size: int = 80,
position: str = "center",
):
"""
Generate a thumbnail with text overlay.
Args:
timestamp: Time in seconds to extract frame (default: 2.0)
style: Catchphrase style (homeshopping, viral, informative)
custom_text: Custom text to use (skip GPT generation if provided)
font_size: Font size for text overlay (default: 80)
position: Text position (top, center, bottom)
Returns:
Thumbnail file path and the text used
"""
job = job_store.get_job(job_id)
if not job:
raise HTTPException(status_code=404, detail="Job not found")
if not job.video_path or not os.path.exists(job.video_path):
raise HTTPException(status_code=400, detail="Video file not found")
# Use empty transcript if not available
transcript = job.transcript or []
success, message, thumbnail_path = await generate_thumbnail(
job_id=job_id,
video_path=job.video_path,
transcript=transcript,
timestamp=timestamp,
style=style,
custom_text=custom_text,
font_size=font_size,
position=position,
)
if not success:
raise HTTPException(status_code=500, detail=message)
# Extract text from message
text_used = message.replace("Thumbnail generated: ", "")
return {
"job_id": job_id,
"thumbnail_path": thumbnail_path,
"thumbnail_url": f"/api/jobs/{job_id}/thumbnail",
"text": text_used,
"timestamp": timestamp,
}