aungkomyat commited on
Commit
3676b66
·
verified ·
1 Parent(s): 209da9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -114
app.py CHANGED
@@ -1,129 +1,65 @@
1
  import os
2
- import sys
3
  import gradio as gr
4
- import numpy as np
5
- import torch
6
- import subprocess
7
- import shutil
8
- from pathlib import Path
9
 
10
- # Model repository information
11
- REPO_URL = "https://github.com/hpbyte/myanmar-tts.git"
12
- MODEL_DIR = "trained_model"
13
- REPO_DIR = "myanmar-tts"
14
 
15
- # Check and install the package if not already installed
16
- def setup_environment():
17
- status_msg = ""
18
 
19
- # Clone the repository if it doesn't exist
20
- if not os.path.exists(REPO_DIR):
21
- status_msg += "Cloning repository...\n"
22
- subprocess.run(["git", "clone", REPO_URL], check=True)
23
-
24
- # Add the repository to Python path
25
- repo_path = os.path.abspath(REPO_DIR)
26
- if repo_path not in sys.path:
27
- sys.path.append(repo_path)
28
- status_msg += f"Added {repo_path} to Python path\n"
29
-
30
- # Create model directory if it doesn't exist
31
- if not os.path.exists(MODEL_DIR):
32
- os.makedirs(MODEL_DIR)
33
- status_msg += f"Created {MODEL_DIR} directory\n"
34
-
35
- return status_msg + "Environment setup complete"
36
 
37
- # Function to synthesize speech
38
- def synthesize_speech(text):
39
  try:
40
- # Import necessary modules from the repository
41
- sys.path.append(REPO_DIR)
42
- from myanmar_tts.text import text_to_sequence
43
- from myanmar_tts.utils.hparams import create_hparams
44
- from myanmar_tts.train import load_model
45
- from myanmar_tts.synthesis import generate_speech
46
- import scipy.io.wavfile
47
-
48
- # Check if model exists, if not provide instructions
49
- checkpoint_path = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
50
- config_path = os.path.join(MODEL_DIR, "hparams.yml")
51
-
52
- if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
53
- return None, f"""Model files not found. Please upload:
54
- 1. The checkpoint file at: {checkpoint_path}
55
- 2. The hparams.yml file at: {config_path}
56
-
57
- You can obtain these files from the original repository or by training the model."""
58
-
59
- # Load the model and hyperparameters
60
- hparams = create_hparams(config_path)
61
- model = load_model(hparams)
62
- model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
63
- model.eval()
64
-
65
- # Process text input
66
- sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
67
- sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
68
-
69
- # Generate mel spectrograms
70
- mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
71
-
72
- # Generate waveform
73
- with torch.no_grad():
74
- waveform = generate_speech(mel_outputs_postnet, hparams)
75
-
76
- # Save and return the audio
77
- output_path = "output.wav"
78
- scipy.io.wavfile.write(output_path, hparams.sampling_rate, waveform)
79
-
80
- return output_path, "Speech generated successfully!"
81
-
82
  except Exception as e:
83
- return None, f"Error: {str(e)}\n\nMake sure you have uploaded the model files to the {MODEL_DIR} directory."
 
84
 
85
- # Function for the Gradio interface
86
- def tts_interface(text):
87
- if not text.strip():
88
- return None, "Please enter some text."
89
 
90
- return synthesize_speech(text)
91
-
92
- # Set up the environment
93
- setup_message = setup_environment()
94
- print(setup_message)
95
-
96
- # Create the Gradio interface
97
- demo = gr.Interface(
98
- fn=tts_interface,
99
- inputs=[
100
- gr.Textbox(
101
- lines=3,
102
- placeholder="Enter Burmese text here...",
103
- label="Text"
104
- )
105
- ],
106
- outputs=[
107
- gr.Audio(label="Generated Speech"),
108
- gr.Textbox(label="Status")
109
- ],
110
- title="Myanmar (Burmese) Text-to-Speech",
111
- description="""
112
- This is a demo of the Myanmar Text-to-Speech system developed by hpbyte.
113
- Enter Burmese text in the box below and click 'Submit' to generate speech.
114
 
115
- **Note:** You need to upload the model files to the 'trained_model' directory:
116
- - checkpoint_latest.pth.tar
117
- - hparams.yml
 
 
118
 
119
- GitHub Repository: https://github.com/hpbyte/myanmar-tts
120
- """,
121
- examples=[
122
- ["မင်္ဂလာပါ"],
123
- ["မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ်"],
124
- ["ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ်"],
125
- ]
126
- )
127
 
128
  # Launch the app
129
  if __name__ == "__main__":
 
1
  import os
2
+ from typing import Optional
3
  import gradio as gr
4
+ from synthesis import synthesize_text
 
 
 
 
5
 
6
+ # Configure correct paths
7
+ MODEL_DIR = os.path.join(os.path.dirname(__file__), "trained_model")
8
+ MODEL_PATH = os.path.join(MODEL_DIR, "checkpoint_latest.pth.tar")
9
+ HPARAMS_PATH = os.path.join(MODEL_DIR, "hparams.yml")
10
 
11
+ # Check if model files exist
12
+ if not os.path.exists(MODEL_PATH):
13
+ print(f"ERROR: Model checkpoint not found at {MODEL_PATH}")
14
 
15
+ if not os.path.exists(HPARAMS_PATH):
16
+ print(f"ERROR: Hyperparameters file not found at {HPARAMS_PATH}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Synthesis function wrapper
19
+ def generate_speech(text: str) -> Optional[str]:
20
  try:
21
+ if not text.strip():
22
+ return None
23
+
24
+ # Pass correct paths to the synthesis function
25
+ audio_output = synthesize_text(
26
+ text,
27
+ checkpoint_path=MODEL_PATH,
28
+ hparams_path=HPARAMS_PATH
29
+ )
30
+ return audio_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  except Exception as e:
32
+ print(f"Error generating speech: {str(e)}")
33
+ return None
34
 
35
+ # Gradio interface
36
+ with gr.Blocks() as demo:
37
+ gr.Markdown("# Myanmar Text-to-Speech Demo")
 
38
 
39
+ with gr.Row():
40
+ with gr.Column():
41
+ text_input = gr.Textbox(
42
+ label="Enter Myanmar text",
43
+ placeholder="မြန်မာစာ ရိုက်ထည့်ပါ",
44
+ lines=3
45
+ )
46
+ submit_btn = gr.Button("Generate Speech")
47
+
48
+ with gr.Column():
49
+ audio_output = gr.Audio(label="Generated Speech")
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ submit_btn.click(
52
+ fn=generate_speech,
53
+ inputs=text_input,
54
+ outputs=audio_output
55
+ )
56
 
57
+ gr.Markdown("""
58
+ ## Example Phrases
59
+ - မင်္ဂလာပါ (Hello)
60
+ - မြန်မာစကားပြောစနစ်ကို ကြိုဆိုပါတယ် (Welcome to the Myanmar speech system)
61
+ - ဒီစနစ်ဟာ မြန်မာစာကို အသံအဖြစ် ပြောင်းပေးနိုင်ပါတယ် (This system can convert Myanmar text to speech)
62
+ """)
 
 
63
 
64
  # Launch the app
65
  if __name__ == "__main__":