Spaces:

Loren
/

Voxtral_Mini_Evaluation

Running on Zero

App Files Files Community

Loren commited on Sep 6

Commit

89ac223

verified ·

1 Parent(s): 84f022b

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -23

app.py CHANGED Viewed

@@ -136,7 +136,7 @@ def process_transcript(language: str, audio_path: str) -> str:
         # Transcription process
         try:
             for path in list_audio_path:
-                inputs = processor.apply_transcrition_request(language=id_language,
                                                               audio=path, model_id=model_name)
                 inputs = inputs.to(device, dtype=torch.bfloat16)
                 outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
@@ -246,7 +246,7 @@ def voice_extract_demucs():
         ]
         subprocess.run(cmd, check=True)
         voice_path = os.path.join("demucs", "htdemucs", "audio_file", "vocals.wav")
-        success_message = "✅ **Success!** Voice extracted."
         return voice_path, voice_path, gr.Markdown(success_message)
     except Exception as e:
         return None, None, gr.Markdown(f"❌ **Error:** An unexpected ERROR occurred: {e}")
@@ -404,31 +404,13 @@ def secure_download_youtube_audio(url: str):
         return None, None, gr.Markdown(f"❌ **Error:** An unexpected ERROR occurred: {e}")
 ###
-def voice_extract_demucs():
-    """
-    Returns the path of the voice extracted file.
-    """
-    try:
-        cmd = [
-            "demucs",
-            "--two-stems=vocals",
-            "--out", "demucs",
-            "audio_file.wav"
-        ]
-        subprocess.run(cmd, check=True)
-        voice_path = os.path.join("demucs", "htdemucs", "audio_file", "vocals.wav")
-        success_message = "✅ **Success!** Voice extracted."
-        return voice_path, voice_path, gr.Markdown(success_message)
-    except Exception as e:
-        return None, None, gr.Markdown(f"❌ **Error:** An unexpected ERROR occurred: {e}")
-###
 def clear_audio():
     return None, None, None, None
 ###
 def get_sel_audio(audio_path: str) -> str:
-    return audio_path
 ###
 #### Gradio interface
@@ -471,11 +453,11 @@ with gr.Blocks(title="Voxtral") as voxtral:
                 )
                 status_output1 = gr.Markdown()
                 with gr.Row():
-                    voice_button0 = gr.Button("Process original audio")
                     voice_button0.click(
                         fn=get_sel_audio,
                         inputs=sel_audio1,
-                        outputs=[sel_audio])
                     voice_button1 = gr.Button("Extract voice (if noisy environment)")
                     voice_button1.click(
                         fn=voice_extract_demucs,

         # Transcription process
         try:
             for path in list_audio_path:
+                inputs = processor.apply_transcription_request(language=id_language,
                                                               audio=path, model_id=model_name)
                 inputs = inputs.to(device, dtype=torch.bfloat16)
                 outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
         ]
         subprocess.run(cmd, check=True)
         voice_path = os.path.join("demucs", "htdemucs", "audio_file", "vocals.wav")
+        success_message = "✅ **Success!** Voice extracted. ("+voice_path+")"
         return voice_path, voice_path, gr.Markdown(success_message)
     except Exception as e:
         return None, None, gr.Markdown(f"❌ **Error:** An unexpected ERROR occurred: {e}")
         return None, None, gr.Markdown(f"❌ **Error:** An unexpected ERROR occurred: {e}")
 ###
 def clear_audio():
     return None, None, None, None
 ###
 def get_sel_audio(audio_path: str) -> str:
+    return audio_path, gr.Markdown("✅ **Original** audio is considered.")
 ###
 #### Gradio interface
                 )
                 status_output1 = gr.Markdown()
                 with gr.Row():
+                    voice_button0 = gr.Button("Process original audio", variant="primary")
                     voice_button0.click(
                         fn=get_sel_audio,
                         inputs=sel_audio1,
+                        outputs=[sel_audio, status_output1])
                     voice_button1 = gr.Button("Extract voice (if noisy environment)")
                     voice_button1.click(
                         fn=voice_extract_demucs,