import re from typing import List, Tuple, Optional from openai import OpenAI from app.models.schemas import TranscriptSegment from app.config import settings def get_openai_client() -> OpenAI: """Get OpenAI client.""" return OpenAI(api_key=settings.OPENAI_API_KEY) class TranslationMode: """Translation mode options.""" DIRECT = "direct" # 직접 번역 (원본 구조 유지) SUMMARIZE = "summarize" # 요약 후 번역 REWRITE = "rewrite" # 요약 + 한글 대본 재작성 async def shorten_text(client: OpenAI, text: str, max_chars: int) -> str: """ Shorten a Korean text to fit within character limit. Args: client: OpenAI client text: Text to shorten max_chars: Maximum character count Returns: Shortened text """ try: response = client.chat.completions.create( model=settings.OPENAI_MODEL, messages=[ { "role": "system", "content": f"""한국어 자막을 {max_chars}자 이내로 줄이세요. 규칙: - 반드시 {max_chars}자 이하! - 핵심 의미만 유지 - 자연스러운 한국어 - 존댓말 유지 - 출력은 줄인 문장만! 예시: 입력: "요리할 때마다 한 시간이 걸리셨죠?" (18자) 제한: 10자 출력: "시간 오래 걸리죠" (8자) 입력: "채소 다듬는 데만 30분 걸리셨죠" (16자) 제한: 10자 출력: "채소만 30분" (6자)""" }, { "role": "user", "content": f"입력: \"{text}\" ({len(text)}자)\n제한: {max_chars}자\n출력:" } ], temperature=0.3, max_tokens=50, ) shortened = response.choices[0].message.content.strip() # Remove quotes, parentheses, and extra characters shortened = shortened.strip('"\'""''') # Remove any trailing parenthetical notes like "(10자)" shortened = re.sub(r'\s*\([^)]*자\)\s*$', '', shortened) shortened = re.sub(r'\s*\(\d+자\)\s*$', '', shortened) # Remove any remaining quotes shortened = shortened.replace('"', '').replace('"', '').replace('"', '') shortened = shortened.replace("'", '').replace("'", '').replace("'", '') shortened = shortened.strip() # If still too long, truncate cleanly if len(shortened) > max_chars: shortened = shortened[:max_chars] return shortened except Exception as e: # Fallback: simple truncation if len(text) > max_chars: return text[:max_chars-1] + "…" return text async def translate_segments( segments: List[TranscriptSegment], target_language: str = "Korean", mode: str = TranslationMode.DIRECT, max_tokens: Optional[int] = None, ) -> Tuple[bool, str, List[TranscriptSegment]]: """ Translate transcript segments to target language using OpenAI. Args: segments: List of transcript segments target_language: Target language for translation mode: Translation mode (direct, summarize, rewrite) max_tokens: Maximum output tokens (for cost control) Returns: Tuple of (success, message, translated_segments) """ if not settings.OPENAI_API_KEY: return False, "OpenAI API key not configured", segments try: client = get_openai_client() # Batch translate for efficiency texts = [seg.text for seg in segments] combined_text = "\n---\n".join(texts) # Calculate video duration for context total_duration = segments[-1].end if segments else 0 # Calculate segment info for length guidance segment_info = [] for i, seg in enumerate(segments): duration = seg.end - seg.start max_chars = int(duration * 5) # ~5 Korean chars per second (stricter for better sync) segment_info.append(f"[{i+1}] {duration:.1f}초 = 최대 {max_chars}자 (엄수!)") # Get custom prompt settings from config gpt_role = settings.GPT_ROLE or "친근한 유튜브 쇼츠 자막 작가" gpt_tone = settings.GPT_TONE or "존댓말" gpt_style = settings.GPT_STYLE or "" # Tone examples tone_examples = { "존댓말": '~해요, ~이에요, ~하죠', "반말": '~해, ~야, ~지', "격식체": '~합니다, ~입니다', } tone_example = tone_examples.get(gpt_tone, tone_examples["존댓말"]) # Additional style instruction style_instruction = f"\n6. Style: {gpt_style}" if gpt_style else "" # Select prompt based on mode if mode == TranslationMode.REWRITE: # Build indexed timeline input with Chinese text # Use segment numbers to handle duplicate timestamps timeline_input = [] for i, seg in enumerate(segments): mins = int(seg.start // 60) secs = int(seg.start % 60) timeline_input.append(f"[{i+1}] {mins}:{secs:02d} {seg.text}") system_prompt = f"""당신은 생활용품 유튜브 쇼츠 자막 작가입니다. 중국어 원문의 "의미"만 참고하여, 한국인이 직접 말하는 것처럼 자연스러운 자막을 작성하세요. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 🎯 핵심 원칙: 번역이 아니라 "재창작" ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ ✅ 필수 규칙: 1. 한 문장 = 한 가지 정보 (두 개 이상 금지) 2. 중복 표현 절대 금지 ("편해요"가 이미 나왔으면 다시 안 씀) 3. {gpt_tone} 사용 ({tone_example}) 4. 세그먼트 수 유지: 입력 {len(segments)}개 → 출력 {len(segments)}개 5. 중국어 한자 금지, 순수 한글만 ❌ 금지 표현 (번역투): - "~할 수 있어요" → "~돼요", "~됩니다" - "매우/아주/정말" 남용 → 꼭 필요할 때만 - "그것은/이것은" → "이거", "이건" - "~하는 것이" → 직접 표현으로 - "편리해요/편해요" 반복 → 한 번만, 이후 다른 표현 - "좋아요/좋고요" 반복 → 구체적 장점으로 대체 🎵 쇼츠 리듬감: - 짧게 끊어서 - 한 호흡에 하나씩 - 시청자가 따라 읽을 수 있게 📝 좋은 예시: 원문: "이 작은 박스 디자인이 참 좋네요. 평소에 씨앗 먹을 때 간편하게 먹을 수 있어요." ❌ 나쁜 번역: "이 작은 박스 디자인이 참 좋네요. 평소에 씨앗 먹을 때 간편하게 먹을 수 있어요." ✅ 좋은 재창작: "이 작은 박스, 생각보다 정말 잘 만들었어요." 원문: "테이블에 두거나 손에 들고 사용하기에도 좋고요. 침대에 누워서나 사무실에서도 간식이나 과일 먹기 정말 편해요." ❌ 나쁜 번역: "테이블에 두거나 손에 들고 사용하기에도 좋고요. 침대에 누워서나 사무실에서도 간식이나 과일 먹기 정말 편해요." ✅ 좋은 재창작 (2개로 분리): - "테이블 위에서도, 침대에서도, 사무실에서도 사용하기 좋고" - "과일 씻고 물기 빼는 데도 활용 가능합니다." 원문: "가정에서 필수 아이템이에요. 정말 유용하죠. 꼭 하나씩 가져야 할 제품이에요." ❌ 나쁜 번역: 그대로 3문장 ✅ 좋은 재창작: "집에 하나 있으면 은근히 자주 쓰게 됩니다."{style_instruction} 출력 형식: [번호] 시간 자막 내용 ⚠️ 입력과 동일한 세그먼트 수({len(segments)}개)를 출력하세요! ⚠️ 각 [번호]는 입력과 1:1 대응해야 합니다!""" # Use indexed timeline format for user content combined_text = "[중국어 원문]\n\n" + "\n".join(timeline_input) elif mode == TranslationMode.SUMMARIZE: system_prompt = f"""You are: {gpt_role} Task: Translate Chinese to SHORT Korean subtitles. Length limits (자막 싱크!): {chr(10).join(segment_info)} Rules: 1. Use {gpt_tone} ({tone_example}) 2. Summarize to core meaning - be BRIEF 3. Max one short sentence per segment 4. {len(segments)} segments separated by '---'{style_instruction}""" else: # DIRECT mode system_prompt = f"""You are: {gpt_role} Task: Translate Chinese to Korean subtitles. Length limits (자막 싱크!): {chr(10).join(segment_info)} Rules: 1. Use {gpt_tone} ({tone_example}) 2. Keep translations SHORT and readable 3. {len(segments)} segments separated by '---'{style_instruction}""" # Build API request request_params = { "model": settings.OPENAI_MODEL, "messages": [ {"role": "system", "content": system_prompt}, {"role": "user", "content": combined_text} ], "temperature": 0.65 if mode == TranslationMode.REWRITE else 0.3, } # Add max_tokens if specified (for cost control) effective_max_tokens = max_tokens or settings.TRANSLATION_MAX_TOKENS if effective_max_tokens: # Use higher token limit for REWRITE mode if mode == TranslationMode.REWRITE: request_params["max_tokens"] = max(effective_max_tokens, 700) else: request_params["max_tokens"] = effective_max_tokens response = client.chat.completions.create(**request_params) translated_text = response.choices[0].message.content # Parse based on mode if mode == TranslationMode.REWRITE: # Parse indexed timeline format: "[1] 0:00 자막\n[2] 0:02 자막\n..." indexed_pattern = re.compile(r'^\[(\d+)\]\s*\d+:\d{2}\s+(.+)$', re.MULTILINE) matches = indexed_pattern.findall(translated_text) # Create mapping from segment index to translation translations_by_index = {} for idx, text in matches: translations_by_index[int(idx)] = text.strip() # Map translations back to segments by index (1-based) for i, seg in enumerate(segments): seg_num = i + 1 # 1-based index if seg_num in translations_by_index: seg.translated = translations_by_index[seg_num] else: # No matching translation found - try fallback to old timestamp-based parsing seg.translated = "" # Fallback: if no indexed matches, try old timestamp format if not matches: print("[Warning] No indexed format found, falling back to timestamp parsing") timeline_pattern = re.compile(r'^(\d+):(\d{2})\s+(.+)$', re.MULTILINE) timestamp_matches = timeline_pattern.findall(translated_text) # Create mapping from timestamp to translation translations_by_time = {} for mins, secs, text in timestamp_matches: time_sec = int(mins) * 60 + int(secs) translations_by_time[time_sec] = text.strip() # Track used translations to prevent duplicates used_translations = set() # Map translations back to segments by matching start times for seg in segments: start_sec = int(seg.start) matched_time = None # Try exact match first if start_sec in translations_by_time and start_sec not in used_translations: matched_time = start_sec else: # Try to find closest UNUSED match within 1 second for t in range(start_sec - 1, start_sec + 2): if t in translations_by_time and t not in used_translations: matched_time = t break if matched_time is not None: seg.translated = translations_by_time[matched_time] used_translations.add(matched_time) else: seg.translated = "" else: # Original parsing for other modes translated_parts = translated_text.split("---") for i, seg in enumerate(segments): if i < len(translated_parts): seg.translated = translated_parts[i].strip() else: seg.translated = seg.text # Fallback to original # Calculate token usage for logging usage = response.usage token_info = f"(tokens: {usage.prompt_tokens}+{usage.completion_tokens}={usage.total_tokens})" # Post-processing: Shorten segments that exceed character limit # Skip for REWRITE mode - the prompt handles length naturally shortened_count = 0 if mode != TranslationMode.REWRITE: chars_per_sec = 5 for i, seg in enumerate(segments): if seg.translated: duration = seg.end - seg.start max_chars = int(duration * chars_per_sec) current_len = len(seg.translated) if current_len > max_chars * 1.3 and max_chars >= 5: seg.translated = await shorten_text(client, seg.translated, max_chars) shortened_count += 1 print(f"[Shorten] Seg {i+1}: {current_len}→{len(seg.translated)}자 (제한:{max_chars}자)") shorten_info = f" [축약:{shortened_count}개]" if shortened_count > 0 else "" return True, f"Translation complete [{mode}] {token_info}{shorten_info}", segments except Exception as e: return False, f"Translation error: {str(e)}", segments async def generate_shorts_script( segments: List[TranscriptSegment], style: str = "engaging", max_tokens: int = 500, ) -> Tuple[bool, str, Optional[str]]: """ Generate a completely new Korean Shorts script from Chinese transcript. Args: segments: Original transcript segments style: Script style (engaging, informative, funny, dramatic) max_tokens: Maximum output tokens Returns: Tuple of (success, message, script) """ if not settings.OPENAI_API_KEY: return False, "OpenAI API key not configured", None try: client = get_openai_client() # Combine all text full_text = " ".join([seg.text for seg in segments]) total_duration = segments[-1].end if segments else 0 style_guides = { "engaging": "Use hooks, questions, and emotional expressions. Start with attention-grabbing line.", "informative": "Focus on facts and clear explanations. Use simple, direct language.", "funny": "Add humor, wordplay, and light-hearted tone. Include relatable jokes.", "dramatic": "Build tension and suspense. Use impactful short sentences.", } style_guide = style_guides.get(style, style_guides["engaging"]) system_prompt = f"""You are a viral Korean YouTube Shorts script writer. Create a COMPLETELY ORIGINAL Korean script inspired by the Chinese video content. === CRITICAL: ANTI-PLAGIARISM RULES === - This is NOT translation - it's ORIGINAL CONTENT CREATION - NEVER copy sentence structures, word order, or phrasing from original - Extract only the CORE IDEA, then write YOUR OWN script from scratch - Imagine you're a Korean creator who just learned this interesting fact - Add your own personality, reactions, and Korean cultural context ======================================= Video duration: ~{int(total_duration)} seconds Style: {style} Guide: {style_guide} Output format: [0:00] 첫 번째 대사 [0:03] 두 번째 대사 ... Requirements: - Write in POLITE FORMAL KOREAN (존댓말/경어) - friendly but respectful - Each line: 2-3 seconds when spoken aloud - Start with a HOOK that grabs attention - Use polite Korean expressions: "이거 아세요?", "정말 신기하죠", "근데 여기서 중요한 건요" - End with engagement: question, call-to-action, or surprise - Make it feel like ORIGINAL Korean content, not a translation""" response = client.chat.completions.create( model=settings.OPENAI_MODEL, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Chinese transcript:\n{full_text}"} ], temperature=0.7, max_tokens=max_tokens, ) script = response.choices[0].message.content usage = response.usage token_info = f"(tokens: {usage.total_tokens})" return True, f"Script generated [{style}] {token_info}", script except Exception as e: return False, f"Script generation error: {str(e)}", None async def translate_single( text: str, target_language: str = "Korean", max_tokens: Optional[int] = None, ) -> Tuple[bool, str]: """Translate a single text.""" if not settings.OPENAI_API_KEY: return False, text try: client = get_openai_client() request_params = { "model": settings.OPENAI_MODEL, "messages": [ { "role": "system", "content": f"Translate to {target_language}. Only output the translation, nothing else." }, { "role": "user", "content": text } ], "temperature": 0.3, } if max_tokens: request_params["max_tokens"] = max_tokens response = client.chat.completions.create(**request_params) translated = response.choices[0].message.content return True, translated.strip() except Exception as e: return False, text