aungkomyat commited on
Commit
8d7439b
·
verified ·
1 Parent(s): b677e2c

Create app_simple.py

Browse files
Files changed (1) hide show
  1. app_simple.py +134 -0
app_simple.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+ import numpy as np
5
+ import subprocess
6
+ import scipy.io.wavfile
7
+ from pathlib import Path
8
+
9
+ # Ensure the repository is cloned
10
+ REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
11
+ REPO_DIR = "myanmar-tts"
12
+
13
+ def setup():
14
+ """Set up the environment by cloning the repository if needed."""
15
+ if not os.path.exists(REPO_DIR):
16
+ print(f"Cloning {REPO_URL}...")
17
+ subprocess.run(["git", "clone", REPO_URL], check=True)
18
+
19
+ # Add the repository to Python path
20
+ repo_path = os.path.abspath(REPO_DIR)
21
+ if repo_path not in sys.path:
22
+ sys.path.append(repo_path)
23
+
24
+ # Create model directory if it doesn't exist
25
+ if not os.path.exists("trained_model"):
26
+ os.makedirs("trained_model")
27
+
28
+ def text_to_speech(text):
29
+ """Convert text to speech using Myanmar TTS."""
30
+ if not text.strip():
31
+ return None, "Please enter some text."
32
+
33
+ try:
34
+ # Try to import the necessary modules
35
+ try:
36
+ import torch
37
+ from text import text_to_sequence
38
+ from utils.hparams import create_hparams
39
+ from train import load_model
40
+ from synthesis import generate_speech
41
+ except ImportError:
42
+ # If direct import fails, try to import from the local module
43
+ import torch
44
+ from myanmar_tts import synthesize
45
+
46
+ # Use the simplified wrapper function
47
+ waveform, sample_rate = synthesize(text)
48
+ output_path = "output.wav"
49
+ scipy.io.wavfile.write(output_path, sample_rate, waveform)
50
+ return output_path, "Speech generated successfully!"
51
+
52
+ # If direct import worked, continue with standard approach
53
+ checkpoint_path = os.path.join("trained_model", "checkpoint_latest.pth.tar")
54
+ config_path = os.path.join("trained_model", "hparams.yml")
55
+
56
+ if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
57
+ return None, f"""Model files not found. Please upload:
58
+ 1. The checkpoint file to: {checkpoint_path}
59
+ 2. The hparams.yml file to: {config_path}
60
+
61
+ You can obtain these files from the original repository."""
62
+
63
+ # Load model and hyperparameters
64
+ hparams = create_hparams(config_path)
65
+ model = load_model(hparams)
66
+ model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
67
+ model.eval()
68
+
69
+ # Process text input
70
+ sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
71
+ sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
72
+
73
+ # Generate mel spectrograms
74
+ mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
75
+
76
+ # Generate waveform
77
+ with torch.no_grad():
78
+ waveform = generate_speech(mel_outputs_postnet, hparams)
79
+
80
+ # Save and return the audio
81
+ output_path = "output.wav"
82
+ scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)
83
+
84
+ return output_path, "Speech generated successfully!"
85
+
86
+ except Exception as e:
87
+ error_msg = str(e)
88
+ detailed_msg = f"""Error: {error_msg}
89
+
90
+ Make sure you have:
91
+ 1. Uploaded the model files to the 'trained_model' directory
92
+ 2. The files are correctly named 'checkpoint_latest.pth.tar' and 'hparams.yml'
93
+
94
+ If you're still seeing this error, please check the repository for any specific setup instructions."""
95
+
96
+ return None, detailed_msg
97
+
98
+ # Set up the environment
99
+ setup()
100
+
101
+ # Create Gradio interface
102
+ demo = gr.Interface(
103
+ fn=text_to_speech,
104
+ inputs=[
105
+ gr.Textbox(
106
+ lines=3,
107
+ placeholder="Enter Burmese text here...",
108
+ label="Text"
109
+ )
110
+ ],
111
+ outputs=[
112
+ gr.Audio(label="Generated Speech"),
113
+ gr.Textbox(label="Status", max_lines=10)
114
+ ],
115
+ title="Myanmar (Burmese) Text-to-Speech",
116
+ description="""
117
+ This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
118
+ Enter Burmese text in the box below and click 'Submit' to generate speech.
119
+
120
+ **Important**: You need to upload the model files to the 'trained_model' directory:
121
+ - checkpoint_latest.pth.tar (the model checkpoint)
122
+ - hparams.yml (hyperparameters configuration)
123
+
124
+ Source: [GitHub Repository](https://github.com/hpbyte/myanmar-tts)
125
+ """,
126
+ examples=[
127
+ ["မင်္ဂလာပါ"],
128
+ ["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
129
+ ["ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ်"],
130
+ ]
131
+ )
132
+
133
+ if __name__ == "__main__":
134
+ demo.launch()