Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -136,7 +136,7 @@ def process_transcript(language: str, audio_path: str) -> str:
|
|
| 136 |
# Transcription process
|
| 137 |
try:
|
| 138 |
for path in list_audio_path:
|
| 139 |
-
inputs = processor.
|
| 140 |
audio=path, model_id=model_name)
|
| 141 |
inputs = inputs.to(device, dtype=torch.bfloat16)
|
| 142 |
outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
|
|
@@ -246,7 +246,7 @@ def voice_extract_demucs():
|
|
| 246 |
]
|
| 247 |
subprocess.run(cmd, check=True)
|
| 248 |
voice_path = os.path.join("demucs", "htdemucs", "audio_file", "vocals.wav")
|
| 249 |
-
success_message = "β
**Success!** Voice extracted."
|
| 250 |
return voice_path, voice_path, gr.Markdown(success_message)
|
| 251 |
except Exception as e:
|
| 252 |
return None, None, gr.Markdown(f"β **Error:** An unexpected ERROR occurred: {e}")
|
|
@@ -404,31 +404,13 @@ def secure_download_youtube_audio(url: str):
|
|
| 404 |
return None, None, gr.Markdown(f"β **Error:** An unexpected ERROR occurred: {e}")
|
| 405 |
###
|
| 406 |
|
| 407 |
-
def voice_extract_demucs():
|
| 408 |
-
"""
|
| 409 |
-
Returns the path of the voice extracted file.
|
| 410 |
-
"""
|
| 411 |
-
try:
|
| 412 |
-
cmd = [
|
| 413 |
-
"demucs",
|
| 414 |
-
"--two-stems=vocals",
|
| 415 |
-
"--out", "demucs",
|
| 416 |
-
"audio_file.wav"
|
| 417 |
-
]
|
| 418 |
-
subprocess.run(cmd, check=True)
|
| 419 |
-
voice_path = os.path.join("demucs", "htdemucs", "audio_file", "vocals.wav")
|
| 420 |
-
success_message = "β
**Success!** Voice extracted."
|
| 421 |
-
return voice_path, voice_path, gr.Markdown(success_message)
|
| 422 |
-
except Exception as e:
|
| 423 |
-
return None, None, gr.Markdown(f"β **Error:** An unexpected ERROR occurred: {e}")
|
| 424 |
-
###
|
| 425 |
|
| 426 |
def clear_audio():
|
| 427 |
return None, None, None, None
|
| 428 |
###
|
| 429 |
|
| 430 |
def get_sel_audio(audio_path: str) -> str:
|
| 431 |
-
return audio_path
|
| 432 |
###
|
| 433 |
|
| 434 |
#### Gradio interface
|
|
@@ -471,11 +453,11 @@ with gr.Blocks(title="Voxtral") as voxtral:
|
|
| 471 |
)
|
| 472 |
status_output1 = gr.Markdown()
|
| 473 |
with gr.Row():
|
| 474 |
-
voice_button0 = gr.Button("Process original audio")
|
| 475 |
voice_button0.click(
|
| 476 |
fn=get_sel_audio,
|
| 477 |
inputs=sel_audio1,
|
| 478 |
-
outputs=[sel_audio])
|
| 479 |
voice_button1 = gr.Button("Extract voice (if noisy environment)")
|
| 480 |
voice_button1.click(
|
| 481 |
fn=voice_extract_demucs,
|
|
|
|
| 136 |
# Transcription process
|
| 137 |
try:
|
| 138 |
for path in list_audio_path:
|
| 139 |
+
inputs = processor.apply_transcription_request(language=id_language,
|
| 140 |
audio=path, model_id=model_name)
|
| 141 |
inputs = inputs.to(device, dtype=torch.bfloat16)
|
| 142 |
outputs = model.generate(**inputs, max_new_tokens=MAX_TOKENS)
|
|
|
|
| 246 |
]
|
| 247 |
subprocess.run(cmd, check=True)
|
| 248 |
voice_path = os.path.join("demucs", "htdemucs", "audio_file", "vocals.wav")
|
| 249 |
+
success_message = "β
**Success!** Voice extracted. ("+voice_path+")"
|
| 250 |
return voice_path, voice_path, gr.Markdown(success_message)
|
| 251 |
except Exception as e:
|
| 252 |
return None, None, gr.Markdown(f"β **Error:** An unexpected ERROR occurred: {e}")
|
|
|
|
| 404 |
return None, None, gr.Markdown(f"β **Error:** An unexpected ERROR occurred: {e}")
|
| 405 |
###
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
def clear_audio():
|
| 409 |
return None, None, None, None
|
| 410 |
###
|
| 411 |
|
| 412 |
def get_sel_audio(audio_path: str) -> str:
|
| 413 |
+
return audio_path, gr.Markdown("β
**Original** audio is considered.")
|
| 414 |
###
|
| 415 |
|
| 416 |
#### Gradio interface
|
|
|
|
| 453 |
)
|
| 454 |
status_output1 = gr.Markdown()
|
| 455 |
with gr.Row():
|
| 456 |
+
voice_button0 = gr.Button("Process original audio", variant="primary")
|
| 457 |
voice_button0.click(
|
| 458 |
fn=get_sel_audio,
|
| 459 |
inputs=sel_audio1,
|
| 460 |
+
outputs=[sel_audio, status_output1])
|
| 461 |
voice_button1 = gr.Button("Extract voice (if noisy environment)")
|
| 462 |
voice_button1.click(
|
| 463 |
fn=voice_extract_demucs,
|