Spaces:
Configuration error
Configuration error
File size: 4,509 Bytes
20b1fe0 f1d74e2 c5e2260 20b1fe0 f1d74e2 c5e2260 f1d74e2 20b1fe0 f1d74e2 c5e2260 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 c5e2260 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 20b1fe0 f1d74e2 c5e2260 f1d74e2 20b1fe0 f1d74e2 c5e2260 f1d74e2 c5e2260 f1d74e2 20b1fe0 c5e2260 20b1fe0 f1d74e2 20b1fe0 c5e2260 20b1fe0 c5e2260 20b1fe0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import sys
import gradio as gr
import numpy as np
import torch
import subprocess
import shutil
from pathlib import Path
# Model repository information
REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
MODEL_DIR = "trained_model"
REPO_DIR = "myanmar-tts"
# Check and install the package if not already installed
def setup_environment():
status_msg = ""
# Clone the repository if it doesn't exist
if not os.path.exists(REPO_DIR):
status_msg += "Cloning repository...\n"
subprocess.run(["git", "clone", REPO_URL], check=True)
# Add the repository to Python path
repo_path = os.path.abspath(REPO_DIR)
if repo_path not in sys.path:
sys.path.append(repo_path)
status_msg += f"Added {repo_path} to Python path\n"
# Create model directory if it doesn't exist
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
status_msg += f"Created {MODEL_DIR} directory\n"
return status_msg + "Environment setup complete"
# Function to synthesize speech
def synthesize_speech(text):
try:
# Import necessary modules from the repository
sys.path.append(REPO_DIR)
from myanmar_tts.text import text_to_sequence
from myanmar_tts.utils.hparams import create_hparams
from myanmar_tts.train import load_model
from myanmar_tts.synthesis import generate_speech
import scipy.io.wavfile
# Check if model exists, if not provide instructions
checkpoint_path = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
config_path = os.path.join(MODEL_DIR, "hparams.yml")
if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
return None, f"""Model files not found. Please upload:
1. The checkpoint file at: {checkpoint_path}
2. The hparams.yml file at: {config_path}
You can obtain these files from the original repository or by training the model."""
# Load the model and hyperparameters
hparams = create_hparams(config_path)
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
model.eval()
# Process text input
sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
# Generate mel spectrograms
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
# Generate waveform
with torch.no_grad():
waveform = generate_speech(mel_outputs_postnet, hparams)
# Save and return the audio
output_path = "output.wav"
scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)
return output_path, "Speech generated successfully!"
except Exception as e:
return None, f"Error: {str(e)}\n\nMake sure you have uploaded the model files to the {MODEL_DIR} directory."
# Function for the Gradio interface
def tts_interface(text):
if not text.strip():
return None, "Please enter some text."
return synthesize_speech(text)
# Set up the environment
setup_message = setup_environment()
print(setup_message)
# Create the Gradio interface
demo = gr.Interface(
fn=tts_interface,
inputs=[
gr.Textbox(
lines=3,
placeholder="Enter Burmese text here...",
label="Text"
)
],
outputs=[
gr.Audio(label="Generated Speech"),
gr.Textbox(label="Status")
],
title="Myanmar (Burmese) Text-to-Speech",
description="""
This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
Enter Burmese text in the box below and click 'Submit' to generate speech.
**Note:** You need to upload the model files to the 'trained_model' directory:
- checkpoint_latest.pth.tar
- hparams.yml
GitHub Repository: https://github.com/hpbyte/myanmar-tts
""",
examples=[
["မင်္ဂလာပါ"],
["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
["ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ်"],
]
)
# Launch the app
if __name__ == "__main__":
demo.launch() |