Spaces:
Sleeping
Sleeping
Commit
Β·
fbf0654
1
Parent(s):
31bcc87
π§ Add FFmpeg system dependency and enhance ASR error handling
Browse files- Add packages.txt with ffmpeg dependency for Hugging Face Spaces
- Enhanced voice_service.py to check FFmpeg availability at startup
- Add graceful fallback to browser-native ASR when FFmpeg is missing
- Improve error handling for audio processing dependencies
- Add subprocess check for FFmpeg availability before Whisper initialization
- packages.txt +1 -0
- voice_service.py +20 -2
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
voice_service.py
CHANGED
|
@@ -57,10 +57,20 @@ class VoiceService:
|
|
| 57 |
if self.asr_provider == "whisper":
|
| 58 |
try:
|
| 59 |
import whisper
|
|
|
|
|
|
|
| 60 |
# Verify it's the correct whisper package
|
| 61 |
if not hasattr(whisper, 'load_model'):
|
| 62 |
raise ImportError("Wrong whisper package - need openai-whisper")
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
# Use base model for balance between speed and accuracy
|
| 65 |
self.whisper_model = whisper.load_model("base")
|
| 66 |
self.asr_available = True
|
|
@@ -215,8 +225,16 @@ class VoiceService:
|
|
| 215 |
return "Server-side ASR not available - using browser ASR"
|
| 216 |
|
| 217 |
except Exception as e:
|
| 218 |
-
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
def get_available_voices(self) -> Dict[str, Any]:
|
| 222 |
"""Get list of available TTS voices"""
|
|
|
|
| 57 |
if self.asr_provider == "whisper":
|
| 58 |
try:
|
| 59 |
import whisper
|
| 60 |
+
import subprocess
|
| 61 |
+
|
| 62 |
# Verify it's the correct whisper package
|
| 63 |
if not hasattr(whisper, 'load_model'):
|
| 64 |
raise ImportError("Wrong whisper package - need openai-whisper")
|
| 65 |
|
| 66 |
+
# Check if FFmpeg is available
|
| 67 |
+
try:
|
| 68 |
+
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
|
| 69 |
+
logger.info("β
FFmpeg is available")
|
| 70 |
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
| 71 |
+
logger.warning("β οΈ FFmpeg not found - Whisper may not work properly")
|
| 72 |
+
raise ImportError("FFmpeg not available")
|
| 73 |
+
|
| 74 |
# Use base model for balance between speed and accuracy
|
| 75 |
self.whisper_model = whisper.load_model("base")
|
| 76 |
self.asr_available = True
|
|
|
|
| 225 |
return "Server-side ASR not available - using browser ASR"
|
| 226 |
|
| 227 |
except Exception as e:
|
| 228 |
+
error_msg = str(e).lower()
|
| 229 |
+
if 'ffmpeg' in error_msg or 'no such file or directory' in error_msg:
|
| 230 |
+
logger.error(f"β FFmpeg Error: {e}")
|
| 231 |
+
logger.info("π FFmpeg not available - switching to browser-native ASR")
|
| 232 |
+
# Update provider to browser-native for future requests
|
| 233 |
+
self.asr_provider = "browser-native"
|
| 234 |
+
return None
|
| 235 |
+
else:
|
| 236 |
+
logger.error(f"β ASR Error: {e}")
|
| 237 |
+
return None
|
| 238 |
|
| 239 |
def get_available_voices(self) -> Dict[str, Any]:
|
| 240 |
"""Get list of available TTS voices"""
|