File size: 3,598 Bytes
b677e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
This is a simplified wrapper for myanmar-tts to handle import issues.
It's intended to make the HuggingFace Space deployment easier.
"""

import os
import sys
import importlib.util

# Add the repository directory to Python path if needed
REPO_DIR = "myanmar-tts"
if os.path.exists(REPO_DIR) and REPO_DIR not in sys.path:
    sys.path.append(os.path.abspath(REPO_DIR))

# Try to import directly, or from the repository
try:
    # First attempt: direct imports
    from text import text_to_sequence
    from utils.hparams import create_hparams
    from train import load_model
    from synthesis import generate_speech
except ImportError:
    try:
        # Second attempt: repository imports
        from myanmar_tts.text import text_to_sequence
        from myanmar_tts.utils.hparams import create_hparams
        from myanmar_tts.train import load_model
        from myanmar_tts.synthesis import generate_speech
    except ImportError:
        # If still failing, try to load modules dynamically
        def load_module(module_name, file_path):
            if not os.path.exists(file_path):
                raise ImportError(f"Module file not found: {file_path}")
            
            spec = importlib.util.spec_from_file_location(module_name, file_path)
            module = importlib.util.module_from_spec(spec)
            spec.loader.exec_module(module)
            return module
        
        # Try to load critical modules
        try:
            text_module = load_module("text", os.path.join(REPO_DIR, "text", "__init__.py"))
            text_to_sequence = text_module.text_to_sequence
            
            hparams_module = load_module("hparams", os.path.join(REPO_DIR, "utils", "hparams.py"))
            create_hparams = hparams_module.create_hparams
            
            train_module = load_module("train", os.path.join(REPO_DIR, "train.py"))
            load_model = train_module.load_model
            
            synthesis_module = load_module("synthesis", os.path.join(REPO_DIR, "synthesis.py"))
            generate_speech = synthesis_module.generate_speech
        except Exception as e:
            print(f"Failed to import myanmar-tts modules: {str(e)}")
            raise

# Define a simple synthesis function
def synthesize(text, model_dir="trained_model"):
    """
    Synthesize speech from the given text using the Myanmar TTS model.
    
    Args:
        text (str): The Burmese text to synthesize
        model_dir (str): Directory containing the model files
        
    Returns:
        tuple: (waveform, sample_rate)
    """
    import torch
    import numpy as np
    
    checkpoint_path = os.path.join(model_dir, "checkpoint_latest.pth.tar")
    config_path = os.path.join(model_dir, "hparams.yml")
    
    if not os.path.exists(checkpoint_path) or not os.path.exists(config_path):
        raise FileNotFoundError(f"Model files not found in {model_dir}")
    
    # Load the model
    hparams = create_hparams(config_path)
    model = load_model(hparams)
    model.load_state_dict(torch.load(checkpoint_path, map_location=torch.device('cpu'))['state_dict'])
    model.eval()
    
    # Process text
    sequence = np.array(text_to_sequence(text, ['burmese_cleaners']))[None, :]
    sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cpu().long()
    
    # Generate mel spectrograms
    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
    
    # Generate waveform
    with torch.no_grad():
        waveform = generate_speech(mel_outputs_postnet, hparams)
    
    return waveform, hparams.sampling_rate