""" Enhanced Audio Services for Conversational Voice Bot Based on friend's implementation with Murf TTS and Groq ASR """ from groq import AsyncGroq from config import GROQ_API_KEY, MURF_API_KEY import asyncio import logging import re logger = logging.getLogger(__name__) # Initialize Groq client groq_client = AsyncGroq(api_key=GROQ_API_KEY) # Initialize Murf client try: from murf import AsyncMurf murf_client = AsyncMurf(api_key=MURF_API_KEY) MURF_AVAILABLE = True except ImportError: logger.warning("Murf package not available. Install with: pip install murf") MURF_AVAILABLE = False def clean_markdown_for_tts(text: str) -> str: """ Clean markdown and formatting from text to make it suitable for TTS """ if not text: return "" # Remove markdown links [text](url) -> text text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) # Remove markdown emphasis **bold** and *italic* -> text text = re.sub(r'\*\*([^\*]+)\*\*', r'\1', text) text = re.sub(r'\*([^\*]+)\*', r'\1', text) # Remove markdown headers ### -> text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE) # Remove code blocks ```code``` -> code text = re.sub(r'```[^`]*```', '', text) text = re.sub(r'`([^`]+)`', r'\1', text) # Remove HTML tags text = re.sub(r'<[^>]+>', '', text) # Clean up bullet points and lists text = re.sub(r'^\s*[-\*\+]\s*', '', text, flags=re.MULTILINE) text = re.sub(r'^\s*\d+\.\s*', '', text, flags=re.MULTILINE) # Remove extra whitespace and line breaks text = re.sub(r'\n\s*\n', '. ', text) text = re.sub(r'\s+', ' ', text) # Remove special characters that TTS might struggle with text = re.sub(r'[#\*_~`]', '', text) return text.strip() async def groq_asr_bytes(audio_bytes: bytes, model: str = "whisper-large-v3", language: str = "en") -> str: """ Transcribe audio using Groq Whisper ASR Enhanced version similar to friend's implementation """ try: logger.info(f"🎤 Transcribing audio with Groq ASR (model: {model}, language: {language})") # Groq client is async, so we can use it directly response = await groq_client.audio.transcriptions.create( model=model, file=("audio.wav", audio_bytes, "audio/wav"), response_format="text", language=language, temperature=0.0 # For more consistent results ) transcription = response.strip() if response else "" logger.info(f"🎯 Transcription result: {transcription}") return transcription except Exception as e: logger.error(f"❌ Groq ASR failed: {e}") return "" async def murf_tts(text: str, voice_id: str = "en-IN-isha", format: str = "MP3") -> bytes: """ Convert text to speech using Murf TTS Enhanced version similar to friend's implementation """ if not MURF_AVAILABLE: logger.error("❌ Murf TTS not available") return b"" if not text or not text.strip(): logger.warning("⚠️ Empty text provided to TTS") return b"" try: # Clean text for TTS clean_text = clean_markdown_for_tts(text) if not clean_text: logger.warning("⚠️ Text became empty after cleaning") return b"" logger.info(f"🔊 Generating speech with Murf TTS (voice: {voice_id})") logger.debug(f"TTS text: {clean_text}") # Generate speech using async streaming response = murf_client.text_to_speech.stream( text=clean_text, voice_id=voice_id, format=format, sample_rate=44100.0 ) # Collect all chunks chunks = [chunk async for chunk in response] full_audio = b''.join(chunks) logger.info(f"✅ Generated {len(full_audio)} bytes of audio") return full_audio except Exception as e: logger.error(f"❌ Murf TTS failed: {e}") return b"" async def edge_tts_fallback(text: str, voice: str = "en-IN-Neerja") -> bytes: """ Fallback TTS using edge-tts if Murf is not available """ try: import edge_tts clean_text = clean_markdown_for_tts(text) if not clean_text: return b"" logger.info(f"🔊 Using Edge TTS fallback (voice: {voice})") communicate = edge_tts.Communicate(clean_text, voice) audio_chunks = [] async for chunk in communicate.stream(): if chunk["type"] == "audio": audio_chunks.append(chunk["data"]) audio_data = b"".join(audio_chunks) logger.info(f"✅ Generated {len(audio_data)} bytes of audio with Edge TTS") return audio_data except ImportError: logger.error("❌ Edge TTS not available. Install with: pip install edge-tts") return b"" except Exception as e: logger.error(f"❌ Edge TTS failed: {e}") return b"" class ConversationalAudioService: """ Main audio service class for conversational voice bot """ def __init__(self): self.groq_client = groq_client self.murf_available = MURF_AVAILABLE self.default_voice = "en-IN-isha" # Indian English voice async def transcribe_audio(self, audio_bytes: bytes, language: str = "en") -> str: """Transcribe audio to text using Groq ASR""" return await groq_asr_bytes(audio_bytes, language=language) async def synthesize_speech(self, text: str, voice_id: str = None) -> bytes: """Convert text to speech using best available TTS""" voice = voice_id or self.default_voice if self.murf_available: # Try Murf TTS first audio = await murf_tts(text, voice_id=voice) if audio: return audio # Fallback to Edge TTS return await edge_tts_fallback(text, voice="en-IN-Neerja") def set_default_voice(self, voice_id: str): """Set default voice for TTS""" self.default_voice = voice_id logger.info(f"🎵 Default voice set to: {voice_id}") # Global audio service instance conversational_audio_service = ConversationalAudioService()