Spaces:

aungkomyat
/

mmtts

Configuration error

App Files Files Community

mmtts / app.py

aungkomyat

Update app.py

f1d74e2 verified 7 months ago

raw

history blame

4.51 kB

	import os
	import sys
	import gradio as gr
	import numpy as np
	import torch
	import subprocess
	import shutil
	from pathlib import Path

	# Model repository information
	REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
	MODEL_DIR = "trained_model"
	REPO_DIR = "myanmar-tts"

	# Check and install the package if not already installed
	def setup_environment():
	status_msg = ""

	# Clone the repository if it doesn't exist
	if not os.path.exists(REPO_DIR):
	status_msg += "Cloning repository...\n"
	subprocess.run(["git", "clone", REPO_URL], check=True)

	# Add the repository to Python path
	repo_path = os.path.abspath(REPO_DIR)
	if repo_path not in sys.path:
	sys.path.append(repo_path)
	status_msg += f"Added {repo_path} to Python path\n"

	# Create model directory if it doesn't exist
	if not os.path.exists(MODEL_DIR):
	os.makedirs(MODEL_DIR)
	status_msg += f"Created {MODEL_DIR} directory\n"

	return status_msg + "Environment setup complete"

	# Function to synthesize speech
	def synthesize_speech(text):
	try:
	# Import necessary modules from the repository
	sys.path.append(REPO_DIR)
	from myanmar_tts.text import text_to_sequence
	from myanmar_tts.utils.hparams import create_hparams
	from myanmar_tts.train import load_model
	from myanmar_tts.synthesis import generate_speech
	import scipy.io.wavfile

	# Check if model exists, if not provide instructions
	checkpoint_path = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
	config_path = os.path.join(MODEL_DIR, "hparams.yml")

	if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
	return None, f"""Model files not found. Please upload:
	1. The checkpoint file at: {checkpoint_path}
	2. The hparams.yml file at: {config_path}

	You can obtain these files from the original repository or by training the model."""

	# Load the model and hyperparameters
	hparams = create_hparams(config_path)
	model = load_model(hparams)
	model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
	model.eval()

	# Process text input
	sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
	sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()

	# Generate mel spectrograms
	mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)

	# Generate waveform
	with torch.no_grad():
	waveform = generate_speech(mel_outputs_postnet, hparams)

	# Save and return the audio
	output_path = "output.wav"
	scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)

	return output_path, "Speech generated successfully!"

	except Exception as e:
	return None, f"Error: {str(e)}\n\nMake sure you have uploaded the model files to the {MODEL_DIR} directory."

	# Function for the Gradio interface
	def tts_interface(text):
	if not text.strip():
	return None, "Please enter some text."

	return synthesize_speech(text)

	# Set up the environment
	setup_message = setup_environment()
	print(setup_message)

	# Create the Gradio interface
	demo = gr.Interface(
	fn=tts_interface,
	inputs=[
	gr.Textbox(
	lines=3,
	placeholder="Enter Burmese text here...",
	label="Text"
	)
	],
	outputs=[
	gr.Audio(label="Generated Speech"),
	gr.Textbox(label="Status")
	],
	title="Myanmar (Burmese) Text-to-Speech",
	description="""
	This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
	Enter Burmese text in the box below and click 'Submit' to generate speech.

	Note: You need to upload the model files to the 'trained_model' directory:
	- checkpoint_latest.pth.tar
	- hparams.yml

	GitHub Repository: https://github.com/hpbyte/myanmar-tts
	""",
	examples=[
	["မင်္ဂလာပါ"],
	["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
	["ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ်"],
	]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()