AJ50 commited on
Commit
c222fbc
·
1 Parent(s): abfe39d

Fix noisy audio: Use WaveRNN vocoder instead of Griffin-Lim + add normalization

Browse files
Files changed (1) hide show
  1. backend/app/voice_cloning.py +17 -3
backend/app/voice_cloning.py CHANGED
@@ -75,13 +75,27 @@ def synthesize(voice_path: Path, text: str, models_dir: Path, out_path: Path) ->
75
  mels = synthesizer.synthesize_spectrograms([text], [embed])
76
  mel = mels[0]
77
 
78
- print("[VoiceCloning] Vocoding waveform...")
79
  try:
80
- waveform = synthesizer.griffin_lim(mel).astype(np.float32)
81
- except Exception:
82
  waveform = vocoder_infer.infer_waveform(
83
  mel, normalize=True, batched=False, target=8000, overlap=800
84
  ).astype(np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  out_path.parent.mkdir(parents=True, exist_ok=True)
87
  sf.write(out_path.as_posix(), waveform, syn_hp.sample_rate)
 
75
  mels = synthesizer.synthesize_spectrograms([text], [embed])
76
  mel = mels[0]
77
 
78
+ print("[VoiceCloning] Vocoding waveform with WaveRNN...")
79
  try:
80
+ # Use the high-quality WaveRNN vocoder (much better than Griffin-Lim)
 
81
  waveform = vocoder_infer.infer_waveform(
82
  mel, normalize=True, batched=False, target=8000, overlap=800
83
  ).astype(np.float32)
84
+ except Exception as e:
85
+ print(f"[VoiceCloning] Vocoder failed: {e}, falling back to Griffin-Lim...")
86
+ waveform = synthesizer.griffin_lim(mel).astype(np.float32)
87
+
88
+ # Normalize waveform to prevent clipping and ensure good volume
89
+ max_abs_value = np.max(np.abs(waveform))
90
+ if max_abs_value > 0:
91
+ # Target peak level at -3dB (0.707 * max_int16)
92
+ target_level = 0.707
93
+ waveform = waveform * (target_level / max_abs_value)
94
+
95
+ # Ensure waveform is in valid range for 16-bit audio
96
+ waveform = np.clip(waveform, -1.0, 1.0)
97
+
98
+ print(f"[VoiceCloning] Waveform normalized - Max: {np.max(np.abs(waveform)):.4f}")
99
 
100
  out_path.parent.mkdir(parents=True, exist_ok=True)
101
  sf.write(out_path.as_posix(), waveform, syn_hp.sample_rate)