"""Multilingual song processing - English and Hindi support."""

import gc
import torch
import numpy as np
from pathlib import Path
from typing import Optional
import sys

from app.song_conversion.vocal_separator import VocalSeparator
from app.song_conversion.audio_mixer import AudioMixer
from app.multilingual_tts import MultilingualTTSService, Language


class MultilingualSongProcessor:
    """
    Orchestrates song voice conversion for multiple languages.
    
    - English songs: Uses WaveRNN voice cloning
    - Hindi songs: Uses XTTS Hindi model
    """
    
    def __init__(self, models_dir: Path, hindi_model_dir: Optional[Path] = None):
        """
        Initialize multilingual song processor.
        
        Args:
            models_dir: Directory with English models
            hindi_model_dir: Directory with Hindi XTTS model
        """
        self.models_dir = Path(models_dir)
        self.hindi_model_dir = Path(hindi_model_dir) if hindi_model_dir else None
        self.separator = None
        self.tts_service = None
        self.sr = 16000
    
    def _ensure_separator(self) -> VocalSeparator:
        """Lazy load vocal separator."""
        if self.separator is None:
            print("[MultilingualSongProcessor] Initializing vocal separator...")
            self.separator = VocalSeparator(model_name="htdemucs")
        return self.separator
    
    def _ensure_tts_service(self) -> MultilingualTTSService:
        """Lazy load TTS service."""
        if self.tts_service is None:
            print("[MultilingualSongProcessor] Initializing multilingual TTS service...")
            self.tts_service = MultilingualTTSService(
                models_dir=self.models_dir,
                hindi_model_dir=self.hindi_model_dir
            )
        return self.tts_service
    
    def _extract_lyrics_from_audio(self, audio_path: Path) -> str:
        """
        Extract lyrics from audio (placeholder).
        In production, would use Whisper with language detection.
        
        Args:
            audio_path: Path to audio file
            
        Returns:
            Extracted or placeholder lyrics
        """
        print("[MultilingualSongProcessor] Extracting lyrics from audio...")
        
        # Placeholder: return generic phonetically rich text
        # In production, use: whisper_model.transcribe(str(audio_path), language='en'/'hi')
        lyrics = "The music is playing so well with this song today"
        
        print(f"[MultilingualSongProcessor] Using default lyrics: {lyrics}")
        return lyrics
    
    def convert_song(self, song_path: Path, voice_path: Path, output_path: Path,
                    language: str = 'english', add_effects: bool = True) -> Path:
        """
        Convert song to user's voice (multilingual support).
        
        Pipeline:
        1. Separate vocals from instrumental (Demucs)
        2. Extract lyrics (placeholder or Whisper)
        3. Synthesize with user's voice (language-aware)
        4. Mix synthesized vocals with instrumental
        5. Add audio effects
        
        Args:
            song_path: Path to input song
            voice_path: Path to reference voice sample
            output_path: Path for output song
            language: 'english' or 'hindi'
            add_effects: Whether to add reverb/compression
            
        Returns:
            Path to output song
        """
        song_path = Path(song_path)
        voice_path = Path(voice_path)
        output_path = Path(output_path)
        language = language.lower()
        
        try:
            print(f"\n[MultilingualSongProcessor] ========== SONG CONVERSION START ==========")
            print(f"[MultilingualSongProcessor] Language: {language.upper()}")
            print(f"[MultilingualSongProcessor] Song: {song_path}")
            print(f"[MultilingualSongProcessor] Voice: {voice_path}")
            print(f"[MultilingualSongProcessor] Output: {output_path}")
            
            # Step 1: Separate vocals
            print(f"\n[MultilingualSongProcessor] STEP 1: Separating vocals...")
            separator = self._ensure_separator()
            vocals, instrumental = separator.separate(song_path, sr=self.sr)
            
            # Step 2: Extract/prepare lyrics
            print(f"\n[MultilingualSongProcessor] STEP 2: Preparing lyrics...")
            lyrics = self._extract_lyrics_from_audio(song_path)
            
            # Step 3-4: Synthesize and mix using multilingual TTS
            print(f"\n[MultilingualSongProcessor] STEP 3-4: Synthesizing vocals with {language.upper()} model...")
            tts_service = self._ensure_tts_service()
            
            try:
                synthesized_vocal = tts_service.synthesize(lyrics, voice_path, language)
            except Exception as e:
                print(f"[MultilingualSongProcessor] Synthesis error: {e}")
                raise
            
            # Resample if needed (XTTS uses 24kHz, we need 16kHz for mixing)
            if len(synthesized_vocal.shape) > 1:
                synthesized_vocal = np.mean(synthesized_vocal, axis=1)
            
            if language == Language.HINDI.value:
                # XTTS uses 24kHz, resample to 16kHz for consistency
                from scipy import signal
                num_samples = int(len(synthesized_vocal) * (self.sr / 24000))
                synthesized_vocal = signal.resample(synthesized_vocal, num_samples)
            
            synthesized_vocal = synthesized_vocal.astype(np.float32)
            print(f"[MultilingualSongProcessor] Synthesized vocal shape: {synthesized_vocal.shape}")
            
            # Step 5: Mix with instrumental
            print(f"\n[MultilingualSongProcessor] STEP 5: Mixing vocals with instrumental...")
            final_audio = AudioMixer.mix_and_save(
                synthesized_vocal, instrumental,
                output_path, sr=self.sr,
                add_effects=add_effects
            )
            
            # Cleanup
            print(f"\n[MultilingualSongProcessor] Cleaning up models...")
            try:
                gc.collect()
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
            except Exception as e:
                print(f"[MultilingualSongProcessor] Warning during cleanup: {e}")
            
            print(f"\n[MultilingualSongProcessor] ========== SONG CONVERSION COMPLETE ==========")
            print(f"[MultilingualSongProcessor] Output saved to: {final_audio}")
            
            return final_audio
            
        except Exception as e:
            print(f"\n[MultilingualSongProcessor] ✗ ERROR: {e}")
            import traceback
            traceback.print_exc()
            sys.stdout.flush()
            raise