Spaces:

aungkomyat
/

mmtts

Configuration error

File size: 4,509 Bytes

20b1fe0
f1d74e2
c5e2260
20b1fe0
 
f1d74e2
 
 
c5e2260
f1d74e2
 
20b1fe0
f1d74e2
c5e2260
f1d74e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20b1fe0
 
f1d74e2
20b1fe0
f1d74e2
c5e2260
f1d74e2
 
20b1fe0
f1d74e2
 
 
 
 
 
 
 
 
 
 
20b1fe0
f1d74e2
 
 
 
 
 
 
 
 
20b1fe0
f1d74e2
20b1fe0
 
f1d74e2
20b1fe0
 
 
f1d74e2
20b1fe0
 
f1d74e2
20b1fe0
 
 
f1d74e2
 
 
20b1fe0
f1d74e2
 
20b1fe0
f1d74e2
c5e2260
f1d74e2
20b1fe0
 
 
 
f1d74e2
c5e2260
f1d74e2
 
 
c5e2260
f1d74e2
20b1fe0
 
 
 
 
 
 
 
 
 
 
 
c5e2260
20b1fe0
 
 
 
 
f1d74e2
 
 
 
20b1fe0
 
 
 
 
 
 
c5e2260
 
20b1fe0
c5e2260
20b1fe0

import os
import sys
import gradio as gr
import numpy as np
import torch
import subprocess
import shutil
from pathlib import Path

# Model repository information
REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
MODEL_DIR = "trained_model"
REPO_DIR = "myanmar-tts"

# Check and install the package if not already installed
def setup_environment():
    status_msg = ""
    
    # Clone the repository if it doesn't exist
    if not os.path.exists(REPO_DIR):
        status_msg += "Cloning repository...\n"
        subprocess.run(["git", "clone", REPO_URL], check=True)
    
    # Add the repository to Python path
    repo_path = os.path.abspath(REPO_DIR)
    if repo_path not in sys.path:
        sys.path.append(repo_path)
        status_msg += f"Added {repo_path} to Python path\n"
    
    # Create model directory if it doesn't exist
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)
        status_msg += f"Created {MODEL_DIR} directory\n"
    
    return status_msg + "Environment setup complete"

# Function to synthesize speech
def synthesize_speech(text):
    try:
        # Import necessary modules from the repository
        sys.path.append(REPO_DIR)
        from myanmar_tts.text import text_to_sequence
        from myanmar_tts.utils.hparams import create_hparams
        from myanmar_tts.train import load_model
        from myanmar_tts.synthesis import generate_speech
        import scipy.io.wavfile
        
        # Check if model exists, if not provide instructions
        checkpoint_path = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
        config_path = os.path.join(MODEL_DIR, "hparams.yml")
        
        if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
            return None, f"""Model files not found. Please upload:
1. The checkpoint file at: {checkpoint_path}
2. The hparams.yml file at: {config_path}

You can obtain these files from the original repository or by training the model."""
        
        # Load the model and hyperparameters
        hparams = create_hparams(config_path)
        model = load_model(hparams)
        model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
        model.eval()
        
        # Process text input
        sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
        sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
        
        # Generate mel spectrograms
        mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
        
        # Generate waveform
        with torch.no_grad():
            waveform = generate_speech(mel_outputs_postnet, hparams)
        
        # Save and return the audio
        output_path = "output.wav"
        scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)
        
        return output_path, "Speech generated successfully!"
    
    except Exception as e:
        return None, f"Error: {str(e)}\n\nMake sure you have uploaded the model files to the {MODEL_DIR} directory."

# Function for the Gradio interface
def tts_interface(text):
    if not text.strip():
        return None, "Please enter some text."
    
    return synthesize_speech(text)

# Set up the environment
setup_message = setup_environment()
print(setup_message)

# Create the Gradio interface
demo = gr.Interface(
    fn=tts_interface,
    inputs=[
        gr.Textbox(
            lines=3, 
            placeholder="Enter Burmese text here...", 
            label="Text"
        )
    ],
    outputs=[
        gr.Audio(label="Generated Speech"),
        gr.Textbox(label="Status")
    ],
    title="Myanmar (Burmese) Text-to-Speech",
    description="""
    This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
    Enter Burmese text in the box below and click 'Submit' to generate speech.
    
    **Note:** You need to upload the model files to the 'trained_model' directory:
    - checkpoint_latest.pth.tar
    - hparams.yml
    
    GitHub Repository: https://github.com/hpbyte/myanmar-tts
    """,
    examples=[
        ["မင်္ဂလာပါ"],
        ["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
        ["ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ်"],
    ]
)

# Launch the app
if __name__ == "__main__":
    demo.launch()