mmtts / app.py
aungkomyat's picture
Update app.py
f1d74e2 verified
raw
history blame
4.51 kB
import os
import sys
import gradio as gr
import numpy as np
import torch
import subprocess
import shutil
from pathlib import Path
# Model repository information
REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
MODEL_DIR = "trained_model"
REPO_DIR = "myanmar-tts"
# Check and install the package if not already installed
def setup_environment():
status_msg = ""
# Clone the repository if it doesn't exist
if not os.path.exists(REPO_DIR):
status_msg += "Cloning repository...\n"
subprocess.run(["git", "clone", REPO_URL], check=True)
# Add the repository to Python path
repo_path = os.path.abspath(REPO_DIR)
if repo_path not in sys.path:
sys.path.append(repo_path)
status_msg += f"Added {repo_path} to Python path\n"
# Create model directory if it doesn't exist
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
status_msg += f"Created {MODEL_DIR} directory\n"
return status_msg + "Environment setup complete"
# Function to synthesize speech
def synthesize_speech(text):
try:
# Import necessary modules from the repository
sys.path.append(REPO_DIR)
from myanmar_tts.text import text_to_sequence
from myanmar_tts.utils.hparams import create_hparams
from myanmar_tts.train import load_model
from myanmar_tts.synthesis import generate_speech
import scipy.io.wavfile
# Check if model exists, if not provide instructions
checkpoint_path = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
config_path = os.path.join(MODEL_DIR, "hparams.yml")
if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
return None, f"""Model files not found. Please upload:
1. The checkpoint file at: {checkpoint_path}
2. The hparams.yml file at: {config_path}
You can obtain these files from the original repository or by training the model."""
# Load the model and hyperparameters
hparams = create_hparams(config_path)
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
model.eval()
# Process text input
sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
# Generate mel spectrograms
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
# Generate waveform
with torch.no_grad():
waveform = generate_speech(mel_outputs_postnet, hparams)
# Save and return the audio
output_path = "output.wav"
scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)
return output_path, "Speech generated successfully!"
except Exception as e:
return None, f"Error: {str(e)}\n\nMake sure you have uploaded the model files to the {MODEL_DIR} directory."
# Function for the Gradio interface
def tts_interface(text):
if not text.strip():
return None, "Please enter some text."
return synthesize_speech(text)
# Set up the environment
setup_message = setup_environment()
print(setup_message)
# Create the Gradio interface
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(
lines=3,
placeholder="Enter Burmese text here...",
label="Text"
)
],
outputs=[
gr.Audio(label="Generated Speech"),
gr.Textbox(label="Status")
],
title="Myanmar (Burmese) Text-to-Speech",
description="""
This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
Enter Burmese text in the box below and click 'Submit' to generate speech.
**Note:** You need to upload the model files to the 'trained_model' directory:
- checkpoint_latest.pth.tar
- hparams.yml
GitHub Repository: https://github.com/hpbyte/myanmar-tts
""",
examples=[
["မင်္ဂလာပါ"],
["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
["ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ်"],
]
)
# Launch the app
if __name__ == "__main__":
demo.launch()