longtq commited on
Commit
2bf87e7
·
1 Parent(s): c393370

Fix build error

Browse files
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10
.vscode/shortcuts.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
Dockerfile CHANGED
@@ -43,7 +43,7 @@ RUN pyenv install ${PYTHON_VERSION} && \
43
  pyenv rehash && \
44
  pip install --no-cache-dir -U pip setuptools wheel
45
 
46
- RUN pip install --no-cache-dir -U torch==1.12.1 torchvision==0.13.1
47
  COPY --chown=1000 requirements.txt /tmp/requirements.txt
48
  RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
49
 
@@ -56,4 +56,4 @@ ENV PYTHONPATH=${HOME}/app \
56
  GRADIO_SERVER_NAME=0.0.0.0 \
57
  GRADIO_THEME=huggingface \
58
  SYSTEM=spaces
59
- CMD ["python", "app.py"]
 
43
  pyenv rehash && \
44
  pip install --no-cache-dir -U pip setuptools wheel
45
 
46
+ RUN pip install --no-cache-dir -U torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0
47
  COPY --chown=1000 requirements.txt /tmp/requirements.txt
48
  RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
49
 
 
56
  GRADIO_SERVER_NAME=0.0.0.0 \
57
  GRADIO_THEME=huggingface \
58
  SYSTEM=spaces
59
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -10,5 +10,16 @@ pinned: false
10
  license: mit
11
  ---
12
 
 
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
10
  license: mit
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
 
15
+ Use local:
16
+ python=3.10
17
+
18
+ ```
19
+ pip install torch==2.4.0+cu124 torchaudio==2.4.0+cu124 --extra-index-url https://download.pytorch.org/whl/cu124
20
+ ```
21
+
22
+ ```
23
+ sudo apt-get update
24
+ sudo apt-get install sox ffmpeg
25
+ ```
app.py CHANGED
@@ -32,215 +32,81 @@ def download_model():
32
  REPO_ID = 'vinthony/SadTalker-V002rc'
33
  snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
34
 
35
- def sadtalker_demo():
36
 
37
- download_model()
 
38
 
 
 
 
 
 
 
 
39
  sad_talker = SadTalker(lazy_load=True)
40
- # tts_talker = TTSTalker()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
 
42
  with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
43
  gr.Markdown("""
44
- # 🎤 F5-TTS: Vietnamese Text-to-Speech Synthesis.
45
- # The model was trained with approximately 1000 hours of data on a RTX 3090 GPU.
46
- Enter text and upload a sample voice to generate natural speech.
47
  """)
48
-
49
  with gr.Row():
50
  ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
 
51
  gen_text = gr.Textbox(label="📝 Text", placeholder="Enter the text to generate voice...", lines=3)
52
-
53
  speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
54
- btn_synthesize = gr.Button("🔥 Generate Voice")
55
-
56
  with gr.Row():
57
- driven_audio = gr.Audio(label="🎧 Generated Audio", type="numpy")
58
- output_spectrogram = gr.Image(label="📊 Spectrogram")
59
-
60
-
61
- btn_synthesize.click(infer_tts, inputs=[ref_audio, gen_text, speed], outputs=[driven_audio, output_spectrogram])
62
-
63
-
64
-
65
- gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
66
- <a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
67
- <a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \
68
- <a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
69
-
70
-
71
- gr.Markdown("""
72
- <b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
73
- <br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
74
- """)
75
-
76
- with gr.Row(): #.style(equal_height=False):
77
- with gr.Column(variant='panel'):
78
- with gr.Tabs(elem_id="sadtalker_source_image"):
79
- with gr.TabItem('Source image'):
80
- with gr.Row():
81
- source_image = gr.Image(label="Source image", source="upload", type="filepath", elem_id="img2img_image") # .style(width=512)
82
-
83
-
84
- with gr.Tabs(elem_id="sadtalker_driven_audio"):
85
- with gr.TabItem('Driving Methods'):
86
- gr.Markdown("Possible driving combinations: <br> 1. Audio only 2. Audio/IDLE Mode + Ref Video(pose, blink, pose+blink) 3. IDLE Mode only 4. Ref Video only (all) ")
87
-
88
- with gr.Row():
89
- # driven_audio = gr.Audio(label="Input audio", source="upload", type="filepath", max_length=180) # 180s
90
- driven_audio_no = gr.Audio(label="Use IDLE mode, no audio is required", source="upload", type="filepath", visible=False)
91
-
92
- with gr.Column():
93
- use_idle_mode = gr.Checkbox(label="Use Idle Animation")
94
- length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
95
- use_idle_mode.change(toggle_audio_file, inputs=use_idle_mode, outputs=[driven_audio, driven_audio_no]) # todo
96
-
97
- with gr.Row():
98
- ref_video = gr.Video(label="Reference Video", source="upload", type="filepath", elem_id="vidref") # .style(width=512)
99
-
100
- with gr.Column():
101
- use_ref_video = gr.Checkbox(label="Use Reference Video")
102
- ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Reference Video',info="How to borrow from reference Video?((fully transfer, aka, video driving mode))")
103
-
104
- ref_video.change(ref_video_fn, inputs=ref_video, outputs=[use_ref_video]) # todo
105
-
106
-
107
- with gr.Column(variant='panel'):
108
- with gr.Tabs(elem_id="sadtalker_checkbox"):
109
- with gr.TabItem('Settings'):
110
- gr.Markdown("need help? please visit our [[best practice page](https://github.com/OpenTalker/SadTalker/blob/main/docs/best_practice.md)] for more detials")
111
- with gr.Column(variant='panel'):
112
- # width = gr.Slider(minimum=64, elem_id="img2img_width", maximum=2048, step=8, label="Manually Crop Width", value=512) # img2img_width
113
- # height = gr.Slider(minimum=64, elem_id="img2img_height", maximum=2048, step=8, label="Manually Crop Height", value=512) # img2img_width
114
- with gr.Row():
115
- pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Pose style", value=0) #
116
- exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1) #
117
- blink_every = gr.Checkbox(label="use eye blink", value=True)
118
-
119
- with gr.Row():
120
- size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?") #
121
- preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
122
-
123
- with gr.Row():
124
- is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
125
- facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='facerender', info="which face render?")
126
-
127
- with gr.Row():
128
- batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=1)
129
- enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
130
-
131
- submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
132
-
133
- with gr.Tabs(elem_id="sadtalker_genearted"):
134
- gen_video = gr.Video(label="Generated video", format="mp4", scale=1) # .style(width=256)
135
-
136
-
137
-
138
- submit.click(
139
- fn=sad_talker.test,
140
- inputs=[source_image,
141
- driven_audio,
142
- preprocess_type,
143
- is_still_mode,
144
- enhancer,
145
- batch_size,
146
- size_of_image,
147
- pose_style,
148
- facerender,
149
- exp_weight,
150
- use_ref_video,
151
- ref_video,
152
- ref_info,
153
- use_idle_mode,
154
- length_of_audio,
155
- blink_every
156
- ],
157
- outputs=[gen_video],
158
- )
159
-
160
  with gr.Row():
161
- examples = [
162
- [
163
- 'examples/source_image/full_body_1.png',
164
- 'examples/driven_audio/bus_chinese.wav',
165
- 'crop',
166
- True,
167
- False
168
- ],
169
- [
170
- 'examples/source_image/full_body_2.png',
171
- 'examples/driven_audio/japanese.wav',
172
- 'crop',
173
- False,
174
- False
175
- ],
176
- [
177
- 'examples/source_image/full3.png',
178
- 'examples/driven_audio/deyu.wav',
179
- 'crop',
180
- False,
181
- True
182
- ],
183
- [
184
- 'examples/source_image/full4.jpeg',
185
- 'examples/driven_audio/eluosi.wav',
186
- 'full',
187
- False,
188
- True
189
- ],
190
- [
191
- 'examples/source_image/full4.jpeg',
192
- 'examples/driven_audio/imagine.wav',
193
- 'full',
194
- True,
195
- True
196
- ],
197
- [
198
- 'examples/source_image/full_body_1.png',
199
- 'examples/driven_audio/bus_chinese.wav',
200
- 'full',
201
- True,
202
- False
203
- ],
204
- [
205
- 'examples/source_image/art_13.png',
206
- 'examples/driven_audio/fayu.wav',
207
- 'resize',
208
- True,
209
- False
210
- ],
211
- [
212
- 'examples/source_image/art_5.png',
213
- 'examples/driven_audio/chinese_news.wav',
214
- 'resize',
215
- False,
216
- False
217
- ],
218
- [
219
- 'examples/source_image/art_5.png',
220
- 'examples/driven_audio/RD_Radio31_000.wav',
221
- 'resize',
222
- True,
223
- True
224
- ],
225
- ]
226
- gr.Examples(examples=examples,
227
- inputs=[
228
- source_image,
229
- driven_audio,
230
- preprocess_type,
231
- is_still_mode,
232
- enhancer],
233
- outputs=[gen_video],
234
- fn=sad_talker.test,
235
- cache_examples=os.getenv('SYSTEM') == 'spaces') #
236
-
237
  return sadtalker_interface
238
-
239
 
240
  if __name__ == "__main__":
241
-
242
  demo = sadtalker_demo()
243
  demo.queue(max_size=10, api_open=True)
244
- demo.launch(debug=True)
245
-
246
-
 
32
  REPO_ID = 'vinthony/SadTalker-V002rc'
33
  snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
34
 
 
35
 
36
+ # New: Gộp 2 nút thành 1, output audio là input cho video
37
+ import soundfile as sf
38
 
39
+ def generate_voice_and_video(ref_audio, ref_text, gen_text, speed, source_image, preprocess_type, is_still_mode, enhancer, batch_size, size_of_image, pose_style, facerender, exp_weight, use_ref_video, ref_video, ref_info, use_idle_mode, length_of_audio, blink_every):
40
+ # 1. Sinh audio từ TTS
41
+ (final_sample_rate, final_wave), _ = infer_tts(ref_audio, ref_text, gen_text, speed)
42
+ # Lưu ra file tạm
43
+ tmp_audio = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
44
+ sf.write(tmp_audio.name, final_wave, final_sample_rate)
45
+ # 2. Gọi SadTalker với audio vừa sinh ra
46
  sad_talker = SadTalker(lazy_load=True)
47
+ video_path = sad_talker.test(
48
+ source_image,
49
+ tmp_audio.name,
50
+ preprocess_type,
51
+ is_still_mode,
52
+ enhancer,
53
+ batch_size,
54
+ size_of_image,
55
+ pose_style,
56
+ facerender,
57
+ exp_weight,
58
+ use_ref_video,
59
+ ref_video,
60
+ ref_info,
61
+ use_idle_mode,
62
+ length_of_audio,
63
+ blink_every
64
+ )
65
+ return tmp_audio.name, video_path
66
 
67
+ def sadtalker_demo():
68
+ download_model()
69
  with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
70
  gr.Markdown("""
71
+ # 🎤 F5-TTS: Vietnamese Text-to-Speech Synthesis & SadTalker Video
72
+ # Nhập text, upload sample voice ảnh để tạo video nói chuyện.
 
73
  """)
 
74
  with gr.Row():
75
  ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
76
+ ref_text = gr.Textbox(label="📝 Reference Transcript (optional)", placeholder="Nhập transcript tiếng Việt cho sample voice nếu có...", lines=2)
77
  gen_text = gr.Textbox(label="📝 Text", placeholder="Enter the text to generate voice...", lines=3)
 
78
  speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
 
 
79
  with gr.Row():
80
+ source_image = gr.Image(label="Source image", type="filepath", elem_id="img2img_image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  with gr.Row():
82
+ # Các setting cho SadTalker
83
+ with gr.Column():
84
+ preprocess_type = gr.Radio(['crop', 'resize','full', 'extcrop', 'extfull'], value='crop', label='preprocess', info="How to handle input image?")
85
+ is_still_mode = gr.Checkbox(label="Still Mode (fewer head motion, works with preprocess `full`)")
86
+ enhancer = gr.Checkbox(label="GFPGAN as Face enhancer")
87
+ batch_size = gr.Slider(label="batch size in generation", step=1, maximum=10, value=1)
88
+ size_of_image = gr.Radio([256, 512], value=256, label='face model resolution', info="use 256/512 model?")
89
+ pose_style = gr.Slider(minimum=0, maximum=45, step=1, label="Pose style", value=0)
90
+ facerender = gr.Radio(['facevid2vid','pirender'], value='facevid2vid', label='facerender', info="which face render?")
91
+ exp_weight = gr.Slider(minimum=0, maximum=3, step=0.1, label="expression scale", value=1)
92
+ use_ref_video = gr.Checkbox(label="Use Reference Video")
93
+ ref_video = gr.Video(label="Reference Video", elem_id="vidref")
94
+ ref_info = gr.Radio(['pose', 'blink','pose+blink', 'all'], value='pose', label='Reference Video',info="How to borrow from reference Video?((fully transfer, aka, video driving mode))")
95
+ use_idle_mode = gr.Checkbox(label="Use Idle Animation")
96
+ length_of_audio = gr.Number(value=5, label="The length(seconds) of the generated video.")
97
+ blink_every = gr.Checkbox(label="use eye blink", value=True)
98
+ btn_generate = gr.Button("🔥 Generate Voice & Video")
99
+ with gr.Row():
100
+ output_audio = gr.Audio(label="🎧 Generated Audio", type="filepath")
101
+ gen_video = gr.Video(label="Generated video", format="mp4", scale=1)
102
+ btn_generate.click(
103
+ generate_voice_and_video,
104
+ inputs=[ref_audio, ref_text, gen_text, speed, source_image, preprocess_type, is_still_mode, enhancer, batch_size, size_of_image, pose_style, facerender, exp_weight, use_ref_video, ref_video, ref_info, use_idle_mode, length_of_audio, blink_every],
105
+ outputs=[output_audio, gen_video]
106
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  return sadtalker_interface
 
108
 
109
  if __name__ == "__main__":
 
110
  demo = sadtalker_demo()
111
  demo.queue(max_size=10, api_open=True)
112
+ demo.launch(debug=True, server_name="0.0.0.0")
 
 
app_tts.py CHANGED
@@ -1,11 +1,12 @@
1
  import spaces
2
  import os
 
3
  from huggingface_hub import login
4
  import gradio as gr
5
  from cached_path import cached_path
6
  import tempfile
7
  from vinorm import TTSnorm
8
-
9
  from f5_tts.model import DiT
10
  from f5_tts.infer.utils_infer import (
11
  preprocess_ref_audio_text,
@@ -13,8 +14,24 @@ from f5_tts.infer.utils_infer import (
13
  load_model,
14
  infer_process,
15
  save_spectrogram,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
-
 
 
18
  # Retrieve token from secrets
19
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
20
 
@@ -35,18 +52,108 @@ def post_process(text):
35
  text = " " + text + " "
36
  text = text.replace('"', "")
37
  return " ".join(text.split())
38
-
39
  # Load models
40
- vocoder = load_vocoder()
41
- model = load_model(
42
- DiT,
43
- dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4),
44
- ckpt_path=str(cached_path("hf://hynt/F5-TTS-Vietnamese-ViVoice/model_last.pt")),
45
- vocab_file=str(cached_path("hf://hynt/F5-TTS-Vietnamese-ViVoice/config.json")),
46
- )
47
-
48
  @spaces.GPU
49
- def infer_tts(ref_audio_orig: str, gen_text: str, speed: float = 1.0, request: gr.Request = None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  if not ref_audio_orig:
52
  raise gr.Error("Please upload a sample audio file.")
@@ -54,39 +161,37 @@ def infer_tts(ref_audio_orig: str, gen_text: str, speed: float = 1.0, request: g
54
  raise gr.Error("Please enter the text content to generate voice.")
55
  if len(gen_text.split()) > 1000:
56
  raise gr.Error("Please enter text content with less than 1000 words.")
57
-
58
  try:
59
- ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, "")
 
 
60
  final_wave, final_sample_rate, spectrogram = infer_process(
61
- ref_audio, ref_text.lower(), post_process(TTSnorm(gen_text)).lower(), model, vocoder, speed=speed
62
  )
63
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
64
  spectrogram_path = tmp_spectrogram.name
65
  save_spectrogram(spectrogram, spectrogram_path)
66
-
67
  return (final_sample_rate, final_wave), spectrogram_path
68
  except Exception as e:
69
  raise gr.Error(f"Error generating voice: {e}")
70
 
71
  # Gradio UI
 
72
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
73
  gr.Markdown("""
74
  # 🎤 F5-TTS: Vietnamese Text-to-Speech Synthesis.
75
  # The model was trained with approximately 1000 hours of data on a RTX 3090 GPU.
76
  Enter text and upload a sample voice to generate natural speech.
77
  """)
78
-
79
  with gr.Row():
80
  ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
 
81
  gen_text = gr.Textbox(label="📝 Text", placeholder="Enter the text to generate voice...", lines=3)
82
-
83
  speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
84
  btn_synthesize = gr.Button("🔥 Generate Voice")
85
-
86
  with gr.Row():
87
  output_audio = gr.Audio(label="🎧 Generated Audio", type="numpy")
88
  output_spectrogram = gr.Image(label="📊 Spectrogram")
89
-
90
  model_limitations = gr.Textbox(
91
  value="""1. This model may not perform well with numerical characters, dates, special characters, etc. => A text normalization module is needed.
92
  2. The rhythm of some generated audios may be inconsistent or choppy => It is recommended to select clearly pronounced sample audios with minimal pauses for better synthesis quality.
@@ -96,8 +201,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
96
  lines=4,
97
  interactive=False
98
  )
99
-
100
- btn_synthesize.click(infer_tts, inputs=[ref_audio, gen_text, speed], outputs=[output_audio, output_spectrogram])
101
 
102
  # Run Gradio with share=True to get a gradio.live link
103
  # demo.queue().launch()
 
1
  import spaces
2
  import os
3
+ import codecs
4
  from huggingface_hub import login
5
  import gradio as gr
6
  from cached_path import cached_path
7
  import tempfile
8
  from vinorm import TTSnorm
9
+ from importlib.resources import files
10
  from f5_tts.model import DiT
11
  from f5_tts.infer.utils_infer import (
12
  preprocess_ref_audio_text,
 
14
  load_model,
15
  infer_process,
16
  save_spectrogram,
17
+ target_sample_rate as default_target_sample_rate,
18
+ n_mel_channels as default_n_mel_channels,
19
+ hop_length as default_hop_length,
20
+ win_length as default_win_length,
21
+ n_fft as default_n_fft,
22
+ mel_spec_type as default_mel_spec_type,
23
+ target_rms as default_target_rms,
24
+ cross_fade_duration as default_cross_fade_duration,
25
+ ode_method as default_ode_method,
26
+ nfe_step as default_nfe_step, # 16, 32
27
+ cfg_strength as default_cfg_strength,
28
+ sway_sampling_coef as default_sway_sampling_coef,
29
+ speed as default_speed,
30
+ fix_duration as default_fix_duration
31
  )
32
+ from pathlib import Path
33
+ from omegaconf import OmegaConf
34
+ from datetime import datetime
35
  # Retrieve token from secrets
36
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
37
 
 
52
  text = " " + text + " "
53
  text = text.replace('"', "")
54
  return " ".join(text.split())
 
55
  # Load models
 
 
 
 
 
 
 
 
56
  @spaces.GPU
57
+ def infer_tts(ref_audio_orig: str, ref_text_input: str, gen_text: str, speed: float = 1.0, request: gr.Request = None):
58
+
59
+ args = {
60
+ "model": "F5TTS_Base",
61
+ "ckpt_file": str(cached_path("hf://hynt/F5-TTS-Vietnamese-ViVoice/model_last.pt")),
62
+ "vocab_file": str(cached_path("hf://hynt/F5-TTS-Vietnamese-ViVoice/config.json")),
63
+ "ref_audio": ref_audio_orig,
64
+ "ref_text": ref_text_input,
65
+ "gen_text": gen_text,
66
+ "speed": speed
67
+ }
68
+ config = {} # tomli.load(open(args.config, "rb"))
69
+ # command-line interface parameters
70
+
71
+ model = args["model"] or config.get("model", "F5TTS_Base")
72
+ ckpt_file = args["ckpt_file"] or config.get("ckpt_file", "")
73
+ vocab_file = args["vocab_file"] or config.get("vocab_file", "")
74
+
75
+ ref_audio = args["ref_audio"] or config.get("ref_audio", "infer/examples/basic/basic_ref_en.wav")
76
+ ref_text = args["ref_text"] if args["ref_text"] is not None else config.get("ref_text", "Some call me nature, others call me mother nature.")
77
+ gen_text = args["gen_text"] or config.get("gen_text", "Here we generate something just for test.")
78
+ gen_file = args.get("gen_file", "") or config.get("gen_file", "")
79
+ output_dir = args.get("output_dir", "") or config.get("output_dir", "tests")
80
+ output_file = args.get("output_file", "") or config.get("output_file", f"infer_cli_{datetime.now().strftime(r'%Y%m%d_%H%M%S')}.wav")
81
+ save_chunk = args.get("save_chunk", False) or config.get("save_chunk", False)
82
+ remove_silence = args.get("remove_silence", False) or config.get("remove_silence", False)
83
+ load_vocoder_from_local = args.get("load_vocoder_from_local", False) or config.get("load_vocoder_from_local", False)
84
+ vocoder_name = args.get("vocoder_name", "") or config.get("vocoder_name", default_mel_spec_type)
85
+ target_rms = args.get("target_rms", None) or config.get("target_rms", default_target_rms)
86
+ cross_fade_duration = args.get("cross_fade_duration", None) or config.get("cross_fade_duration", default_cross_fade_duration)
87
+ nfe_step = args.get("nfe_step", None) or config.get("nfe_step", default_nfe_step)
88
+ cfg_strength = args.get("cfg_strength", None) or config.get("cfg_strength", default_cfg_strength)
89
+ sway_sampling_coef = args.get("sway_sampling_coef", None) or config.get("sway_sampling_coef", default_sway_sampling_coef)
90
+ speed = args.get("speed", None) or config.get("speed", default_speed)
91
+ fix_duration = args.get("fix_duration", None) or config.get("fix_duration", default_fix_duration)
92
+
93
+ if "infer/examples/" in ref_audio:
94
+ ref_audio = str(files("f5_tts").joinpath(f"{ref_audio}"))
95
+ if "infer/examples/" in gen_file:
96
+ gen_file = str(files("f5_tts").joinpath(f"{gen_file}"))
97
+ if "voices" in config:
98
+ for voice in config["voices"]:
99
+ voice_ref_audio = config["voices"][voice]["ref_audio"]
100
+ if "infer/examples/" in voice_ref_audio:
101
+ config["voices"][voice]["ref_audio"] = str(files("f5_tts").joinpath(f"{voice_ref_audio}"))
102
+
103
+
104
+ # ignore gen_text if gen_file provided
105
+
106
+ if gen_file:
107
+ gen_text = codecs.open(gen_file, "r", "utf-8").read()
108
+
109
+
110
+ # output path
111
+
112
+ wave_path = Path(output_dir) / output_file
113
+ # spectrogram_path = Path(output_dir) / "infer_cli_out.png"
114
+ if save_chunk:
115
+ output_chunk_dir = os.path.join(output_dir, f"{Path(output_file).stem}_chunks")
116
+ if not os.path.exists(output_chunk_dir):
117
+ os.makedirs(output_chunk_dir)
118
+
119
+ # load vocoder
120
+
121
+ if vocoder_name == "vocos":
122
+ vocoder_local_path = "../checkpoints/vocos-mel-24khz"
123
+ elif vocoder_name == "bigvgan":
124
+ vocoder_local_path = "../checkpoints/bigvgan_v2_24khz_100band_256x"
125
+
126
+ vocoder = load_vocoder(vocoder_name=vocoder_name, is_local=load_vocoder_from_local, local_path=vocoder_local_path)
127
+
128
+
129
+ # load TTS model
130
+
131
+ model_cfg = OmegaConf.load(
132
+ config.get("model_cfg", str(files("f5_tts").joinpath(f"configs/{model}.yaml")))
133
+ ).model
134
+ model_cls = globals()[model_cfg.backbone]
135
+
136
+ repo_name, ckpt_step, ckpt_type = "F5-TTS", 1250000, "safetensors"
137
+
138
+ if model != "F5TTS_Base":
139
+ assert vocoder_name == model_cfg.mel_spec.mel_spec_type
140
+
141
+ # override for previous models
142
+ if model == "F5TTS_Base":
143
+ if vocoder_name == "vocos":
144
+ ckpt_step = 1200000
145
+ elif vocoder_name == "bigvgan":
146
+ model = "F5TTS_Base_bigvgan"
147
+ ckpt_type = "pt"
148
+ elif model == "E2TTS_Base":
149
+ repo_name = "E2-TTS"
150
+ ckpt_step = 1200000
151
+
152
+ if not ckpt_file:
153
+ ckpt_file = str(cached_path(f"hf://SWivid/{repo_name}/{model}/model_{ckpt_step}.{ckpt_type}"))
154
+
155
+ print(f"Using {model}...")
156
+ ema_model = load_model(model_cls, model_cfg.arch, ckpt_file, mel_spec_type=vocoder_name, vocab_file=vocab_file)
157
 
158
  if not ref_audio_orig:
159
  raise gr.Error("Please upload a sample audio file.")
 
161
  raise gr.Error("Please enter the text content to generate voice.")
162
  if len(gen_text.split()) > 1000:
163
  raise gr.Error("Please enter text content with less than 1000 words.")
 
164
  try:
165
+ # Nếu người dùng nhập ref_text thì dùng, không thì để rỗng để tự động nhận diện
166
+ ref_audio, ref_text = preprocess_ref_audio_text(ref_audio_orig, ref_text_input or "")
167
+ gen_text_ = gen_text.strip()
168
  final_wave, final_sample_rate, spectrogram = infer_process(
169
+ ref_audio, ref_text.lower(), gen_text_, ema_model, vocoder, speed=speed
170
  )
171
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_spectrogram:
172
  spectrogram_path = tmp_spectrogram.name
173
  save_spectrogram(spectrogram, spectrogram_path)
 
174
  return (final_sample_rate, final_wave), spectrogram_path
175
  except Exception as e:
176
  raise gr.Error(f"Error generating voice: {e}")
177
 
178
  # Gradio UI
179
+
180
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
181
  gr.Markdown("""
182
  # 🎤 F5-TTS: Vietnamese Text-to-Speech Synthesis.
183
  # The model was trained with approximately 1000 hours of data on a RTX 3090 GPU.
184
  Enter text and upload a sample voice to generate natural speech.
185
  """)
 
186
  with gr.Row():
187
  ref_audio = gr.Audio(label="🔊 Sample Voice", type="filepath")
188
+ ref_text = gr.Textbox(label="📝 Reference Transcript (optional)", placeholder="Nhập transcript tiếng Việt cho sample voice nếu có...", lines=2)
189
  gen_text = gr.Textbox(label="📝 Text", placeholder="Enter the text to generate voice...", lines=3)
 
190
  speed = gr.Slider(0.3, 2.0, value=1.0, step=0.1, label="⚡ Speed")
191
  btn_synthesize = gr.Button("🔥 Generate Voice")
 
192
  with gr.Row():
193
  output_audio = gr.Audio(label="🎧 Generated Audio", type="numpy")
194
  output_spectrogram = gr.Image(label="📊 Spectrogram")
 
195
  model_limitations = gr.Textbox(
196
  value="""1. This model may not perform well with numerical characters, dates, special characters, etc. => A text normalization module is needed.
197
  2. The rhythm of some generated audios may be inconsistent or choppy => It is recommended to select clearly pronounced sample audios with minimal pauses for better synthesis quality.
 
201
  lines=4,
202
  interactive=False
203
  )
204
+ btn_synthesize.click(infer_tts, inputs=[ref_audio, ref_text, gen_text, speed], outputs=[output_audio, output_spectrogram])
 
205
 
206
  # Run Gradio with share=True to get a gradio.live link
207
  # demo.queue().launch()
examples/vocab.txt ADDED
@@ -0,0 +1,2566 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ !
3
+ "
4
+ #
5
+ $
6
+ %
7
+ &
8
+ '
9
+ (
10
+ )
11
+ *
12
+ +
13
+ ,
14
+ -
15
+ .
16
+ /
17
+ 0
18
+ 1
19
+ 2
20
+ 3
21
+ 4
22
+ 5
23
+ 6
24
+ 7
25
+ 8
26
+ 9
27
+ :
28
+ ;
29
+ =
30
+ >
31
+ ?
32
+ @
33
+ A
34
+ B
35
+ C
36
+ D
37
+ E
38
+ F
39
+ G
40
+ H
41
+ I
42
+ J
43
+ K
44
+ L
45
+ M
46
+ N
47
+ O
48
+ P
49
+ Q
50
+ R
51
+ S
52
+ T
53
+ U
54
+ V
55
+ W
56
+ X
57
+ Y
58
+ Z
59
+ [
60
+ \
61
+ ]
62
+ _
63
+ a
64
+ a1
65
+ ai1
66
+ ai2
67
+ ai3
68
+ ai4
69
+ an1
70
+ an3
71
+ an4
72
+ ang1
73
+ ang2
74
+ ang4
75
+ ao1
76
+ ao2
77
+ ao3
78
+ ao4
79
+ b
80
+ ba
81
+ ba1
82
+ ba2
83
+ ba3
84
+ ba4
85
+ bai1
86
+ bai2
87
+ bai3
88
+ bai4
89
+ ban1
90
+ ban2
91
+ ban3
92
+ ban4
93
+ bang1
94
+ bang2
95
+ bang3
96
+ bang4
97
+ bao1
98
+ bao2
99
+ bao3
100
+ bao4
101
+ bei
102
+ bei1
103
+ bei2
104
+ bei3
105
+ bei4
106
+ ben1
107
+ ben2
108
+ ben3
109
+ ben4
110
+ beng
111
+ beng1
112
+ beng2
113
+ beng3
114
+ beng4
115
+ bi1
116
+ bi2
117
+ bi3
118
+ bi4
119
+ bian1
120
+ bian2
121
+ bian3
122
+ bian4
123
+ biao1
124
+ biao2
125
+ biao3
126
+ bie1
127
+ bie2
128
+ bie3
129
+ bie4
130
+ bin1
131
+ bin4
132
+ bing1
133
+ bing2
134
+ bing3
135
+ bing4
136
+ bo
137
+ bo1
138
+ bo2
139
+ bo3
140
+ bo4
141
+ bu2
142
+ bu3
143
+ bu4
144
+ c
145
+ ca1
146
+ cai1
147
+ cai2
148
+ cai3
149
+ cai4
150
+ can1
151
+ can2
152
+ can3
153
+ can4
154
+ cang1
155
+ cang2
156
+ cao1
157
+ cao2
158
+ cao3
159
+ ce4
160
+ cen1
161
+ cen2
162
+ ceng1
163
+ ceng2
164
+ ceng4
165
+ cha1
166
+ cha2
167
+ cha3
168
+ cha4
169
+ chai1
170
+ chai2
171
+ chan1
172
+ chan2
173
+ chan3
174
+ chan4
175
+ chang1
176
+ chang2
177
+ chang3
178
+ chang4
179
+ chao1
180
+ chao2
181
+ chao3
182
+ che1
183
+ che2
184
+ che3
185
+ che4
186
+ chen1
187
+ chen2
188
+ chen3
189
+ chen4
190
+ cheng1
191
+ cheng2
192
+ cheng3
193
+ cheng4
194
+ chi1
195
+ chi2
196
+ chi3
197
+ chi4
198
+ chong1
199
+ chong2
200
+ chong3
201
+ chong4
202
+ chou1
203
+ chou2
204
+ chou3
205
+ chou4
206
+ chu1
207
+ chu2
208
+ chu3
209
+ chu4
210
+ chua1
211
+ chuai1
212
+ chuai2
213
+ chuai3
214
+ chuai4
215
+ chuan1
216
+ chuan2
217
+ chuan3
218
+ chuan4
219
+ chuang1
220
+ chuang2
221
+ chuang3
222
+ chuang4
223
+ chui1
224
+ chui2
225
+ chun1
226
+ chun2
227
+ chun3
228
+ chuo1
229
+ chuo4
230
+ ci1
231
+ ci2
232
+ ci3
233
+ ci4
234
+ cong1
235
+ cong2
236
+ cou4
237
+ cu1
238
+ cu4
239
+ cuan1
240
+ cuan2
241
+ cuan4
242
+ cui1
243
+ cui3
244
+ cui4
245
+ cun1
246
+ cun2
247
+ cun4
248
+ cuo1
249
+ cuo2
250
+ cuo4
251
+ d
252
+ da
253
+ da1
254
+ da2
255
+ da3
256
+ da4
257
+ dai1
258
+ dai2
259
+ dai3
260
+ dai4
261
+ dan1
262
+ dan2
263
+ dan3
264
+ dan4
265
+ dang1
266
+ dang2
267
+ dang3
268
+ dang4
269
+ dao1
270
+ dao2
271
+ dao3
272
+ dao4
273
+ de
274
+ de1
275
+ de2
276
+ dei3
277
+ den4
278
+ deng1
279
+ deng2
280
+ deng3
281
+ deng4
282
+ di1
283
+ di2
284
+ di3
285
+ di4
286
+ dia3
287
+ dian1
288
+ dian2
289
+ dian3
290
+ dian4
291
+ diao1
292
+ diao3
293
+ diao4
294
+ die1
295
+ die2
296
+ die4
297
+ ding1
298
+ ding2
299
+ ding3
300
+ ding4
301
+ diu1
302
+ dong1
303
+ dong3
304
+ dong4
305
+ dou1
306
+ dou2
307
+ dou3
308
+ dou4
309
+ du1
310
+ du2
311
+ du3
312
+ du4
313
+ duan1
314
+ duan2
315
+ duan3
316
+ duan4
317
+ dui1
318
+ dui4
319
+ dun1
320
+ dun3
321
+ dun4
322
+ duo1
323
+ duo2
324
+ duo3
325
+ duo4
326
+ e
327
+ e1
328
+ e2
329
+ e3
330
+ e4
331
+ ei2
332
+ en1
333
+ en4
334
+ er
335
+ er2
336
+ er3
337
+ er4
338
+ f
339
+ fa1
340
+ fa2
341
+ fa3
342
+ fa4
343
+ fan1
344
+ fan2
345
+ fan3
346
+ fan4
347
+ fang1
348
+ fang2
349
+ fang3
350
+ fang4
351
+ fei1
352
+ fei2
353
+ fei3
354
+ fei4
355
+ fen1
356
+ fen2
357
+ fen3
358
+ fen4
359
+ feng1
360
+ feng2
361
+ feng3
362
+ feng4
363
+ fo2
364
+ fou2
365
+ fou3
366
+ fu1
367
+ fu2
368
+ fu3
369
+ fu4
370
+ g
371
+ ga1
372
+ ga2
373
+ ga3
374
+ ga4
375
+ gai1
376
+ gai2
377
+ gai3
378
+ gai4
379
+ gan1
380
+ gan2
381
+ gan3
382
+ gan4
383
+ gang1
384
+ gang2
385
+ gang3
386
+ gang4
387
+ gao1
388
+ gao2
389
+ gao3
390
+ gao4
391
+ ge1
392
+ ge2
393
+ ge3
394
+ ge4
395
+ gei2
396
+ gei3
397
+ gen1
398
+ gen2
399
+ gen3
400
+ gen4
401
+ geng1
402
+ geng3
403
+ geng4
404
+ gong1
405
+ gong3
406
+ gong4
407
+ gou1
408
+ gou2
409
+ gou3
410
+ gou4
411
+ gu
412
+ gu1
413
+ gu2
414
+ gu3
415
+ gu4
416
+ gua1
417
+ gua2
418
+ gua3
419
+ gua4
420
+ guai1
421
+ guai2
422
+ guai3
423
+ guai4
424
+ guan1
425
+ guan2
426
+ guan3
427
+ guan4
428
+ guang1
429
+ guang2
430
+ guang3
431
+ guang4
432
+ gui1
433
+ gui2
434
+ gui3
435
+ gui4
436
+ gun3
437
+ gun4
438
+ guo1
439
+ guo2
440
+ guo3
441
+ guo4
442
+ h
443
+ ha1
444
+ ha2
445
+ ha3
446
+ hai1
447
+ hai2
448
+ hai3
449
+ hai4
450
+ han1
451
+ han2
452
+ han3
453
+ han4
454
+ hang1
455
+ hang2
456
+ hang4
457
+ hao1
458
+ hao2
459
+ hao3
460
+ hao4
461
+ he1
462
+ he2
463
+ he4
464
+ hei1
465
+ hen2
466
+ hen3
467
+ hen4
468
+ heng1
469
+ heng2
470
+ heng4
471
+ hong1
472
+ hong2
473
+ hong3
474
+ hong4
475
+ hou1
476
+ hou2
477
+ hou3
478
+ hou4
479
+ hu1
480
+ hu2
481
+ hu3
482
+ hu4
483
+ hua1
484
+ hua2
485
+ hua4
486
+ huai2
487
+ huai4
488
+ huan1
489
+ huan2
490
+ huan3
491
+ huan4
492
+ huang1
493
+ huang2
494
+ huang3
495
+ huang4
496
+ hui1
497
+ hui2
498
+ hui3
499
+ hui4
500
+ hun1
501
+ hun2
502
+ hun4
503
+ huo
504
+ huo1
505
+ huo2
506
+ huo3
507
+ huo4
508
+ i
509
+ j
510
+ ji1
511
+ ji2
512
+ ji3
513
+ ji4
514
+ jia
515
+ jia1
516
+ jia2
517
+ jia3
518
+ jia4
519
+ jian1
520
+ jian2
521
+ jian3
522
+ jian4
523
+ jiang1
524
+ jiang2
525
+ jiang3
526
+ jiang4
527
+ jiao1
528
+ jiao2
529
+ jiao3
530
+ jiao4
531
+ jie1
532
+ jie2
533
+ jie3
534
+ jie4
535
+ jin1
536
+ jin2
537
+ jin3
538
+ jin4
539
+ jing1
540
+ jing2
541
+ jing3
542
+ jing4
543
+ jiong3
544
+ jiu1
545
+ jiu2
546
+ jiu3
547
+ jiu4
548
+ ju1
549
+ ju2
550
+ ju3
551
+ ju4
552
+ juan1
553
+ juan2
554
+ juan3
555
+ juan4
556
+ jue1
557
+ jue2
558
+ jue4
559
+ jun1
560
+ jun4
561
+ k
562
+ ka1
563
+ ka2
564
+ ka3
565
+ kai1
566
+ kai2
567
+ kai3
568
+ kai4
569
+ kan1
570
+ kan2
571
+ kan3
572
+ kan4
573
+ kang1
574
+ kang2
575
+ kang4
576
+ kao1
577
+ kao2
578
+ kao3
579
+ kao4
580
+ ke1
581
+ ke2
582
+ ke3
583
+ ke4
584
+ ken3
585
+ keng1
586
+ kong1
587
+ kong3
588
+ kong4
589
+ kou1
590
+ kou2
591
+ kou3
592
+ kou4
593
+ ku1
594
+ ku2
595
+ ku3
596
+ ku4
597
+ kua1
598
+ kua3
599
+ kua4
600
+ kuai3
601
+ kuai4
602
+ kuan1
603
+ kuan2
604
+ kuan3
605
+ kuang1
606
+ kuang2
607
+ kuang4
608
+ kui1
609
+ kui2
610
+ kui3
611
+ kui4
612
+ kun1
613
+ kun3
614
+ kun4
615
+ kuo4
616
+ l
617
+ la
618
+ la1
619
+ la2
620
+ la3
621
+ la4
622
+ lai2
623
+ lai4
624
+ lan2
625
+ lan3
626
+ lan4
627
+ lang1
628
+ lang2
629
+ lang3
630
+ lang4
631
+ lao1
632
+ lao2
633
+ lao3
634
+ lao4
635
+ le
636
+ le1
637
+ le4
638
+ lei
639
+ lei1
640
+ lei2
641
+ lei3
642
+ lei4
643
+ leng1
644
+ leng2
645
+ leng3
646
+ leng4
647
+ li
648
+ li1
649
+ li2
650
+ li3
651
+ li4
652
+ lia3
653
+ lian2
654
+ lian3
655
+ lian4
656
+ liang2
657
+ liang3
658
+ liang4
659
+ liao1
660
+ liao2
661
+ liao3
662
+ liao4
663
+ lie1
664
+ lie2
665
+ lie3
666
+ lie4
667
+ lin1
668
+ lin2
669
+ lin3
670
+ lin4
671
+ ling2
672
+ ling3
673
+ ling4
674
+ liu1
675
+ liu2
676
+ liu3
677
+ liu4
678
+ long1
679
+ long2
680
+ long3
681
+ long4
682
+ lou1
683
+ lou2
684
+ lou3
685
+ lou4
686
+ lu1
687
+ lu2
688
+ lu3
689
+ lu4
690
+ luan2
691
+ luan3
692
+ luan4
693
+ lun1
694
+ lun2
695
+ lun4
696
+ luo1
697
+ luo2
698
+ luo3
699
+ luo4
700
+ lv2
701
+ lv3
702
+ lv4
703
+ lve3
704
+ lve4
705
+ m
706
+ ma
707
+ ma1
708
+ ma2
709
+ ma3
710
+ ma4
711
+ mai2
712
+ mai3
713
+ mai4
714
+ man1
715
+ man2
716
+ man3
717
+ man4
718
+ mang2
719
+ mang3
720
+ mao1
721
+ mao2
722
+ mao3
723
+ mao4
724
+ me
725
+ mei2
726
+ mei3
727
+ mei4
728
+ men
729
+ men1
730
+ men2
731
+ men4
732
+ meng
733
+ meng1
734
+ meng2
735
+ meng3
736
+ meng4
737
+ mi1
738
+ mi2
739
+ mi3
740
+ mi4
741
+ mian2
742
+ mian3
743
+ mian4
744
+ miao1
745
+ miao2
746
+ miao3
747
+ miao4
748
+ mie1
749
+ mie4
750
+ min2
751
+ min3
752
+ ming2
753
+ ming3
754
+ ming4
755
+ miu4
756
+ mo1
757
+ mo2
758
+ mo3
759
+ mo4
760
+ mou1
761
+ mou2
762
+ mou3
763
+ mu2
764
+ mu3
765
+ mu4
766
+ n
767
+ n2
768
+ na1
769
+ na2
770
+ na3
771
+ na4
772
+ nai2
773
+ nai3
774
+ nai4
775
+ nan1
776
+ nan2
777
+ nan3
778
+ nan4
779
+ nang1
780
+ nang2
781
+ nang3
782
+ nao1
783
+ nao2
784
+ nao3
785
+ nao4
786
+ ne
787
+ ne2
788
+ ne4
789
+ nei3
790
+ nei4
791
+ nen4
792
+ neng2
793
+ ni1
794
+ ni2
795
+ ni3
796
+ ni4
797
+ nian1
798
+ nian2
799
+ nian3
800
+ nian4
801
+ niang2
802
+ niang4
803
+ niao2
804
+ niao3
805
+ niao4
806
+ nie1
807
+ nie4
808
+ nin2
809
+ ning2
810
+ ning3
811
+ ning4
812
+ niu1
813
+ niu2
814
+ niu3
815
+ niu4
816
+ nong2
817
+ nong4
818
+ nou4
819
+ nu2
820
+ nu3
821
+ nu4
822
+ nuan3
823
+ nuo2
824
+ nuo4
825
+ nv2
826
+ nv3
827
+ nve4
828
+ o
829
+ o1
830
+ o2
831
+ ou1
832
+ ou2
833
+ ou3
834
+ ou4
835
+ p
836
+ pa1
837
+ pa2
838
+ pa4
839
+ pai1
840
+ pai2
841
+ pai3
842
+ pai4
843
+ pan1
844
+ pan2
845
+ pan4
846
+ pang1
847
+ pang2
848
+ pang4
849
+ pao1
850
+ pao2
851
+ pao3
852
+ pao4
853
+ pei1
854
+ pei2
855
+ pei4
856
+ pen1
857
+ pen2
858
+ pen4
859
+ peng1
860
+ peng2
861
+ peng3
862
+ peng4
863
+ pi1
864
+ pi2
865
+ pi3
866
+ pi4
867
+ pian1
868
+ pian2
869
+ pian4
870
+ piao1
871
+ piao2
872
+ piao3
873
+ piao4
874
+ pie1
875
+ pie2
876
+ pie3
877
+ pin1
878
+ pin2
879
+ pin3
880
+ pin4
881
+ ping1
882
+ ping2
883
+ po1
884
+ po2
885
+ po3
886
+ po4
887
+ pou1
888
+ pu1
889
+ pu2
890
+ pu3
891
+ pu4
892
+ q
893
+ qi1
894
+ qi2
895
+ qi3
896
+ qi4
897
+ qia1
898
+ qia3
899
+ qia4
900
+ qian1
901
+ qian2
902
+ qian3
903
+ qian4
904
+ qiang1
905
+ qiang2
906
+ qiang3
907
+ qiang4
908
+ qiao1
909
+ qiao2
910
+ qiao3
911
+ qiao4
912
+ qie1
913
+ qie2
914
+ qie3
915
+ qie4
916
+ qin1
917
+ qin2
918
+ qin3
919
+ qin4
920
+ qing1
921
+ qing2
922
+ qing3
923
+ qing4
924
+ qiong1
925
+ qiong2
926
+ qiu1
927
+ qiu2
928
+ qiu3
929
+ qu1
930
+ qu2
931
+ qu3
932
+ qu4
933
+ quan1
934
+ quan2
935
+ quan3
936
+ quan4
937
+ que1
938
+ que2
939
+ que4
940
+ qun2
941
+ r
942
+ ran2
943
+ ran3
944
+ rang1
945
+ rang2
946
+ rang3
947
+ rang4
948
+ rao2
949
+ rao3
950
+ rao4
951
+ re2
952
+ re3
953
+ re4
954
+ ren2
955
+ ren3
956
+ ren4
957
+ reng1
958
+ reng2
959
+ ri4
960
+ rong1
961
+ rong2
962
+ rong3
963
+ rou2
964
+ rou4
965
+ ru2
966
+ ru3
967
+ ru4
968
+ ruan2
969
+ ruan3
970
+ rui3
971
+ rui4
972
+ run4
973
+ ruo4
974
+ s
975
+ sa1
976
+ sa2
977
+ sa3
978
+ sa4
979
+ sai1
980
+ sai4
981
+ san1
982
+ san2
983
+ san3
984
+ san4
985
+ sang1
986
+ sang3
987
+ sang4
988
+ sao1
989
+ sao2
990
+ sao3
991
+ sao4
992
+ se4
993
+ sen1
994
+ seng1
995
+ sha1
996
+ sha2
997
+ sha3
998
+ sha4
999
+ shai1
1000
+ shai2
1001
+ shai3
1002
+ shai4
1003
+ shan1
1004
+ shan3
1005
+ shan4
1006
+ shang
1007
+ shang1
1008
+ shang3
1009
+ shang4
1010
+ shao1
1011
+ shao2
1012
+ shao3
1013
+ shao4
1014
+ she1
1015
+ she2
1016
+ she3
1017
+ she4
1018
+ shei2
1019
+ shen1
1020
+ shen2
1021
+ shen3
1022
+ shen4
1023
+ sheng1
1024
+ sheng2
1025
+ sheng3
1026
+ sheng4
1027
+ shi
1028
+ shi1
1029
+ shi2
1030
+ shi3
1031
+ shi4
1032
+ shou1
1033
+ shou2
1034
+ shou3
1035
+ shou4
1036
+ shu1
1037
+ shu2
1038
+ shu3
1039
+ shu4
1040
+ shua1
1041
+ shua2
1042
+ shua3
1043
+ shua4
1044
+ shuai1
1045
+ shuai3
1046
+ shuai4
1047
+ shuan1
1048
+ shuan4
1049
+ shuang1
1050
+ shuang3
1051
+ shui2
1052
+ shui3
1053
+ shui4
1054
+ shun3
1055
+ shun4
1056
+ shuo1
1057
+ shuo4
1058
+ si1
1059
+ si2
1060
+ si3
1061
+ si4
1062
+ song1
1063
+ song3
1064
+ song4
1065
+ sou1
1066
+ sou3
1067
+ sou4
1068
+ su1
1069
+ su2
1070
+ su4
1071
+ suan1
1072
+ suan4
1073
+ sui1
1074
+ sui2
1075
+ sui3
1076
+ sui4
1077
+ sun1
1078
+ sun3
1079
+ suo
1080
+ suo1
1081
+ suo2
1082
+ suo3
1083
+ t
1084
+ ta1
1085
+ ta2
1086
+ ta3
1087
+ ta4
1088
+ tai1
1089
+ tai2
1090
+ tai4
1091
+ tan1
1092
+ tan2
1093
+ tan3
1094
+ tan4
1095
+ tang1
1096
+ tang2
1097
+ tang3
1098
+ tang4
1099
+ tao1
1100
+ tao2
1101
+ tao3
1102
+ tao4
1103
+ te4
1104
+ teng2
1105
+ ti1
1106
+ ti2
1107
+ ti3
1108
+ ti4
1109
+ tian1
1110
+ tian2
1111
+ tian3
1112
+ tiao1
1113
+ tiao2
1114
+ tiao3
1115
+ tiao4
1116
+ tie1
1117
+ tie2
1118
+ tie3
1119
+ tie4
1120
+ ting1
1121
+ ting2
1122
+ ting3
1123
+ tong1
1124
+ tong2
1125
+ tong3
1126
+ tong4
1127
+ tou
1128
+ tou1
1129
+ tou2
1130
+ tou4
1131
+ tu1
1132
+ tu2
1133
+ tu3
1134
+ tu4
1135
+ tuan1
1136
+ tuan2
1137
+ tui1
1138
+ tui2
1139
+ tui3
1140
+ tui4
1141
+ tun1
1142
+ tun2
1143
+ tun4
1144
+ tuo1
1145
+ tuo2
1146
+ tuo3
1147
+ tuo4
1148
+ u
1149
+ v
1150
+ w
1151
+ wa
1152
+ wa1
1153
+ wa2
1154
+ wa3
1155
+ wa4
1156
+ wai1
1157
+ wai3
1158
+ wai4
1159
+ wan1
1160
+ wan2
1161
+ wan3
1162
+ wan4
1163
+ wang1
1164
+ wang2
1165
+ wang3
1166
+ wang4
1167
+ wei1
1168
+ wei2
1169
+ wei3
1170
+ wei4
1171
+ wen1
1172
+ wen2
1173
+ wen3
1174
+ wen4
1175
+ weng1
1176
+ weng4
1177
+ wo1
1178
+ wo2
1179
+ wo3
1180
+ wo4
1181
+ wu1
1182
+ wu2
1183
+ wu3
1184
+ wu4
1185
+ x
1186
+ xi1
1187
+ xi2
1188
+ xi3
1189
+ xi4
1190
+ xia1
1191
+ xia2
1192
+ xia4
1193
+ xian1
1194
+ xian2
1195
+ xian3
1196
+ xian4
1197
+ xiang1
1198
+ xiang2
1199
+ xiang3
1200
+ xiang4
1201
+ xiao1
1202
+ xiao2
1203
+ xiao3
1204
+ xiao4
1205
+ xie1
1206
+ xie2
1207
+ xie3
1208
+ xie4
1209
+ xin1
1210
+ xin2
1211
+ xin4
1212
+ xing1
1213
+ xing2
1214
+ xing3
1215
+ xing4
1216
+ xiong1
1217
+ xiong2
1218
+ xiu1
1219
+ xiu3
1220
+ xiu4
1221
+ xu
1222
+ xu1
1223
+ xu2
1224
+ xu3
1225
+ xu4
1226
+ xuan1
1227
+ xuan2
1228
+ xuan3
1229
+ xuan4
1230
+ xue1
1231
+ xue2
1232
+ xue3
1233
+ xue4
1234
+ xun1
1235
+ xun2
1236
+ xun4
1237
+ y
1238
+ ya
1239
+ ya1
1240
+ ya2
1241
+ ya3
1242
+ ya4
1243
+ yan1
1244
+ yan2
1245
+ yan3
1246
+ yan4
1247
+ yang1
1248
+ yang2
1249
+ yang3
1250
+ yang4
1251
+ yao1
1252
+ yao2
1253
+ yao3
1254
+ yao4
1255
+ ye1
1256
+ ye2
1257
+ ye3
1258
+ ye4
1259
+ yi
1260
+ yi1
1261
+ yi2
1262
+ yi3
1263
+ yi4
1264
+ yin1
1265
+ yin2
1266
+ yin3
1267
+ yin4
1268
+ ying1
1269
+ ying2
1270
+ ying3
1271
+ ying4
1272
+ yo1
1273
+ yong1
1274
+ yong2
1275
+ yong3
1276
+ yong4
1277
+ you1
1278
+ you2
1279
+ you3
1280
+ you4
1281
+ yu1
1282
+ yu2
1283
+ yu3
1284
+ yu4
1285
+ yuan1
1286
+ yuan2
1287
+ yuan3
1288
+ yuan4
1289
+ yue1
1290
+ yue4
1291
+ yun1
1292
+ yun2
1293
+ yun3
1294
+ yun4
1295
+ z
1296
+ za1
1297
+ za2
1298
+ za3
1299
+ zai1
1300
+ zai3
1301
+ zai4
1302
+ zan1
1303
+ zan2
1304
+ zan3
1305
+ zan4
1306
+ zang1
1307
+ zang4
1308
+ zao1
1309
+ zao2
1310
+ zao3
1311
+ zao4
1312
+ ze2
1313
+ ze4
1314
+ zei2
1315
+ zen3
1316
+ zeng1
1317
+ zeng4
1318
+ zha1
1319
+ zha2
1320
+ zha3
1321
+ zha4
1322
+ zhai1
1323
+ zhai2
1324
+ zhai3
1325
+ zhai4
1326
+ zhan1
1327
+ zhan2
1328
+ zhan3
1329
+ zhan4
1330
+ zhang1
1331
+ zhang2
1332
+ zhang3
1333
+ zhang4
1334
+ zhao1
1335
+ zhao2
1336
+ zhao3
1337
+ zhao4
1338
+ zhe
1339
+ zhe1
1340
+ zhe2
1341
+ zhe3
1342
+ zhe4
1343
+ zhen1
1344
+ zhen2
1345
+ zhen3
1346
+ zhen4
1347
+ zheng1
1348
+ zheng2
1349
+ zheng3
1350
+ zheng4
1351
+ zhi1
1352
+ zhi2
1353
+ zhi3
1354
+ zhi4
1355
+ zhong1
1356
+ zhong2
1357
+ zhong3
1358
+ zhong4
1359
+ zhou1
1360
+ zhou2
1361
+ zhou3
1362
+ zhou4
1363
+ zhu1
1364
+ zhu2
1365
+ zhu3
1366
+ zhu4
1367
+ zhua1
1368
+ zhua2
1369
+ zhua3
1370
+ zhuai1
1371
+ zhuai3
1372
+ zhuai4
1373
+ zhuan1
1374
+ zhuan2
1375
+ zhuan3
1376
+ zhuan4
1377
+ zhuang1
1378
+ zhuang4
1379
+ zhui1
1380
+ zhui4
1381
+ zhun1
1382
+ zhun2
1383
+ zhun3
1384
+ zhuo1
1385
+ zhuo2
1386
+ zi
1387
+ zi1
1388
+ zi2
1389
+ zi3
1390
+ zi4
1391
+ zong1
1392
+ zong2
1393
+ zong3
1394
+ zong4
1395
+ zou1
1396
+ zou2
1397
+ zou3
1398
+ zou4
1399
+ zu1
1400
+ zu2
1401
+ zu3
1402
+ zuan1
1403
+ zuan3
1404
+ zuan4
1405
+ zui2
1406
+ zui3
1407
+ zui4
1408
+ zun1
1409
+ zuo
1410
+ zuo1
1411
+ zuo2
1412
+ zuo3
1413
+ zuo4
1414
+ {
1415
+ ~
1416
+ ¡
1417
+ ¢
1418
+ £
1419
+ ¥
1420
+ §
1421
+ ¨
1422
+ ©
1423
+ «
1424
+ ®
1425
+ ¯
1426
+ °
1427
+ ±
1428
+ ²
1429
+ ³
1430
+ ´
1431
+ µ
1432
+ ·
1433
+ ¹
1434
+ º
1435
+ »
1436
+ ¼
1437
+ ½
1438
+ ¾
1439
+ ¿
1440
+ À
1441
+ Á
1442
+ Â
1443
+ Ã
1444
+ Ä
1445
+ Å
1446
+ Æ
1447
+ Ç
1448
+ È
1449
+ É
1450
+ Ê
1451
+ Í
1452
+ Î
1453
+ Ñ
1454
+ Ó
1455
+ Ö
1456
+ ×
1457
+ Ø
1458
+ Ú
1459
+ Ü
1460
+ Ý
1461
+ Þ
1462
+ ß
1463
+ à
1464
+ á
1465
+ â
1466
+ ã
1467
+ ä
1468
+ å
1469
+ æ
1470
+ ç
1471
+ è
1472
+ é
1473
+ ê
1474
+ ë
1475
+ ì
1476
+ í
1477
+ î
1478
+ ï
1479
+ ð
1480
+ ñ
1481
+ ò
1482
+ ó
1483
+ ô
1484
+ õ
1485
+ ö
1486
+ ø
1487
+ ù
1488
+ ú
1489
+ û
1490
+ ü
1491
+ ý
1492
+ Ā
1493
+ ā
1494
+ ă
1495
+ ą
1496
+ ć
1497
+ Č
1498
+ č
1499
+ Đ
1500
+ đ
1501
+ ē
1502
+ ė
1503
+ ę
1504
+ ě
1505
+ ĝ
1506
+ ğ
1507
+ ħ
1508
+ ī
1509
+ į
1510
+ İ
1511
+ ı
1512
+ Ł
1513
+ ł
1514
+ ń
1515
+ ņ
1516
+ ň
1517
+ ŋ
1518
+ Ō
1519
+ ō
1520
+ ő
1521
+ œ
1522
+ ř
1523
+ Ś
1524
+ ś
1525
+ Ş
1526
+ ş
1527
+ Š
1528
+ š
1529
+ Ť
1530
+ ť
1531
+ ũ
1532
+ ū
1533
+ ź
1534
+ Ż
1535
+ ż
1536
+ Ž
1537
+ ž
1538
+ ơ
1539
+ ư
1540
+ ǎ
1541
+ ǐ
1542
+ ǒ
1543
+ ǔ
1544
+ ǚ
1545
+ ș
1546
+ ț
1547
+ ɑ
1548
+ ɔ
1549
+ ɕ
1550
+ ə
1551
+ ɛ
1552
+ ɜ
1553
+ ɡ
1554
+ ɣ
1555
+ ɪ
1556
+ ɫ
1557
+ ɴ
1558
+ ɹ
1559
+ ɾ
1560
+ ʃ
1561
+ ʊ
1562
+ ʌ
1563
+ ʒ
1564
+ ʔ
1565
+ ʰ
1566
+ ʷ
1567
+ ʻ
1568
+ ʾ
1569
+ ʿ
1570
+ ˈ
1571
+ ː
1572
+ ˙
1573
+ ˜
1574
+ ˢ
1575
+ ́
1576
+ ̅
1577
+ Α
1578
+ Β
1579
+ Δ
1580
+ Ε
1581
+ Θ
1582
+ Κ
1583
+ Λ
1584
+ Μ
1585
+ Ξ
1586
+ Π
1587
+ Σ
1588
+ Τ
1589
+ Φ
1590
+ Χ
1591
+ Ψ
1592
+ Ω
1593
+ ά
1594
+ έ
1595
+ ή
1596
+ ί
1597
+ α
1598
+ β
1599
+ γ
1600
+ δ
1601
+ ε
1602
+ ζ
1603
+ η
1604
+ θ
1605
+ ι
1606
+ κ
1607
+ λ
1608
+ μ
1609
+ ν
1610
+ ξ
1611
+ ο
1612
+ π
1613
+ ρ
1614
+ ς
1615
+ σ
1616
+ τ
1617
+ υ
1618
+ φ
1619
+ χ
1620
+ ψ
1621
+ ω
1622
+ ϊ
1623
+ ό
1624
+ ύ
1625
+ ώ
1626
+ ϕ
1627
+ ϵ
1628
+ Ё
1629
+ А
1630
+ Б
1631
+ В
1632
+ Г
1633
+ Д
1634
+ Е
1635
+ Ж
1636
+ З
1637
+ И
1638
+ Й
1639
+ К
1640
+ Л
1641
+ М
1642
+ Н
1643
+ О
1644
+ П
1645
+ Р
1646
+ С
1647
+ Т
1648
+ У
1649
+ Ф
1650
+ Х
1651
+ Ц
1652
+ Ч
1653
+ Ш
1654
+ Щ
1655
+ Ы
1656
+ Ь
1657
+ Э
1658
+ Ю
1659
+ Я
1660
+ а
1661
+ б
1662
+ в
1663
+ г
1664
+ д
1665
+ е
1666
+ ж
1667
+ з
1668
+ и
1669
+ й
1670
+ к
1671
+ л
1672
+ м
1673
+ н
1674
+ о
1675
+ п
1676
+ р
1677
+ с
1678
+ т
1679
+ у
1680
+ ф
1681
+ х
1682
+ ц
1683
+ ч
1684
+ ш
1685
+ щ
1686
+ ъ
1687
+ ы
1688
+ ь
1689
+ э
1690
+ ю
1691
+ я
1692
+ ё
1693
+ і
1694
+ ְ
1695
+ ִ
1696
+ ֵ
1697
+ ֶ
1698
+ ַ
1699
+ ָ
1700
+ ֹ
1701
+ ּ
1702
+ ־
1703
+ ׁ
1704
+ א
1705
+ ב
1706
+ ג
1707
+ ד
1708
+ ה
1709
+ ו
1710
+ ז
1711
+ ח
1712
+ ט
1713
+ י
1714
+ כ
1715
+ ל
1716
+ ם
1717
+ מ
1718
+ ן
1719
+ נ
1720
+ ס
1721
+ ע
1722
+ פ
1723
+ ק
1724
+ ר
1725
+ ש
1726
+ ת
1727
+ أ
1728
+ ب
1729
+ ة
1730
+ ت
1731
+ ج
1732
+ ح
1733
+ د
1734
+ ر
1735
+ ز
1736
+ س
1737
+ ص
1738
+ ط
1739
+ ع
1740
+ ق
1741
+ ك
1742
+ ل
1743
+ م
1744
+ ن
1745
+ ه
1746
+ و
1747
+ ي
1748
+ َ
1749
+ ُ
1750
+ ِ
1751
+ ْ
1752
+
1753
+
1754
+
1755
+
1756
+
1757
+
1758
+
1759
+
1760
+
1761
+
1762
+
1763
+
1764
+
1765
+
1766
+
1767
+
1768
+
1769
+
1770
+
1771
+
1772
+
1773
+
1774
+
1775
+
1776
+
1777
+
1778
+
1779
+
1780
+
1781
+
1782
+
1783
+
1784
+
1785
+
1786
+
1787
+
1788
+
1789
+
1790
+
1791
+
1792
+
1793
+
1794
+
1795
+
1796
+
1797
+
1798
+
1799
+
1800
+ ế
1801
+
1802
+
1803
+
1804
+
1805
+
1806
+
1807
+
1808
+
1809
+
1810
+
1811
+
1812
+
1813
+
1814
+
1815
+
1816
+
1817
+
1818
+
1819
+
1820
+
1821
+
1822
+
1823
+
1824
+
1825
+
1826
+
1827
+
1828
+
1829
+
1830
+ ���
1831
+
1832
+
1833
+
1834
+
1835
+
1836
+
1837
+
1838
+
1839
+
1840
+
1841
+
1842
+
1843
+
1844
+
1845
+
1846
+
1847
+
1848
+
1849
+
1850
+
1851
+
1852
+
1853
+
1854
+
1855
+
1856
+
1857
+
1858
+
1859
+
1860
+
1861
+
1862
+
1863
+
1864
+
1865
+
1866
+
1867
+
1868
+
1869
+
1870
+
1871
+
1872
+
1873
+
1874
+
1875
+
1876
+
1877
+
1878
+
1879
+
1880
+
1881
+
1882
+
1883
+
1884
+
1885
+
1886
+
1887
+
1888
+
1889
+
1890
+
1891
+
1892
+
1893
+
1894
+
1895
+
1896
+
1897
+
1898
+
1899
+
1900
+
1901
+
1902
+
1903
+
1904
+
1905
+
1906
+
1907
+
1908
+
1909
+
1910
+
1911
+
1912
+
1913
+
1914
+
1915
+
1916
+
1917
+
1918
+
1919
+
1920
+
1921
+
1922
+
1923
+
1924
+
1925
+
1926
+
1927
+
1928
+
1929
+
1930
+
1931
+
1932
+
1933
+
1934
+
1935
+
1936
+
1937
+
1938
+
1939
+
1940
+
1941
+
1942
+
1943
+
1944
+
1945
+
1946
+
1947
+
1948
+
1949
+
1950
+
1951
+
1952
+
1953
+
1954
+
1955
+
1956
+
1957
+
1958
+
1959
+
1960
+
1961
+
1962
+
1963
+
1964
+
1965
+
1966
+
1967
+
1968
+
1969
+
1970
+
1971
+
1972
+
1973
+
1974
+
1975
+
1976
+
1977
+
1978
+
1979
+
1980
+
1981
+
1982
+
1983
+
1984
+
1985
+
1986
+
1987
+
1988
+
1989
+
1990
+
1991
+
1992
+
1993
+
1994
+
1995
+
1996
+
1997
+
1998
+
1999
+
2000
+
2001
+
2002
+
2003
+
2004
+
2005
+
2006
+
2007
+
2008
+
2009
+
2010
+
2011
+
2012
+
2013
+
2014
+
2015
+
2016
+
2017
+
2018
+
2019
+
2020
+
2021
+
2022
+
2023
+
2024
+
2025
+
2026
+
2027
+
2028
+
2029
+
2030
+
2031
+
2032
+
2033
+
2034
+
2035
+
2036
+
2037
+
2038
+
2039
+
2040
+
2041
+
2042
+
2043
+
2044
+
2045
+
2046
+
2047
+
2048
+
2049
+
2050
+
2051
+
2052
+
2053
+
2054
+
2055
+
2056
+
2057
+
2058
+
2059
+
2060
+
2061
+
2062
+
2063
+
2064
+
2065
+
2066
+
2067
+
2068
+
2069
+
2070
+
2071
+
2072
+
2073
+
2074
+
2075
+
2076
+
2077
+
2078
+
2079
+
2080
+
2081
+
2082
+
2083
+
2084
+
2085
+
2086
+
2087
+
2088
+
2089
+
2090
+
2091
+
2092
+
2093
+
2094
+
2095
+
2096
+
2097
+
2098
+
2099
+
2100
+
2101
+
2102
+
2103
+
2104
+
2105
+
2106
+
2107
+
2108
+
2109
+
2110
+
2111
+
2112
+
2113
+
2114
+
2115
+
2116
+
2117
+
2118
+
2119
+
2120
+
2121
+
2122
+
2123
+
2124
+
2125
+
2126
+
2127
+
2128
+
2129
+
2130
+
2131
+
2132
+
2133
+
2134
+
2135
+
2136
+
2137
+
2138
+
2139
+
2140
+
2141
+
2142
+
2143
+
2144
+
2145
+
2146
+
2147
+
2148
+
2149
+
2150
+
2151
+
2152
+
2153
+
2154
+
2155
+
2156
+
2157
+
2158
+
2159
+
2160
+
2161
+
2162
+
2163
+
2164
+
2165
+
2166
+
2167
+
2168
+
2169
+
2170
+
2171
+
2172
+
2173
+
2174
+
2175
+
2176
+
2177
+
2178
+
2179
+
2180
+
2181
+
2182
+
2183
+
2184
+
2185
+
2186
+
2187
+
2188
+
2189
+
2190
+
2191
+
2192
+
2193
+
2194
+
2195
+
2196
+
2197
+
2198
+
2199
+
2200
+
2201
+
2202
+
2203
+
2204
+
2205
+
2206
+
2207
+
2208
+
2209
+
2210
+
2211
+
2212
+
2213
+
2214
+
2215
+
2216
+
2217
+
2218
+
2219
+
2220
+
2221
+
2222
+
2223
+
2224
+
2225
+
2226
+
2227
+
2228
+
2229
+
2230
+
2231
+
2232
+
2233
+
2234
+
2235
+
2236
+
2237
+
2238
+
2239
+
2240
+
2241
+
2242
+
2243
+
2244
+
2245
+
2246
+
2247
+
2248
+
2249
+
2250
+
2251
+
2252
+
2253
+
2254
+
2255
+
2256
+
2257
+
2258
+
2259
+
2260
+
2261
+
2262
+
2263
+
2264
+
2265
+
2266
+
2267
+
2268
+
2269
+
2270
+
2271
+
2272
+
2273
+
2274
+
2275
+
2276
+
2277
+
2278
+
2279
+
2280
+
2281
+
2282
+
2283
+
2284
+
2285
+
2286
+
2287
+
2288
+
2289
+
2290
+
2291
+
2292
+
2293
+
2294
+
2295
+
2296
+
2297
+
2298
+
2299
+
2300
+
2301
+
2302
+
2303
+
2304
+
2305
+
2306
+
2307
+
2308
+
2309
+
2310
+
2311
+
2312
+
2313
+
2314
+
2315
+
2316
+
2317
+
2318
+
2319
+
2320
+
2321
+
2322
+
2323
+
2324
+
2325
+
2326
+
2327
+
2328
+
2329
+
2330
+
2331
+
2332
+
2333
+
2334
+
2335
+
2336
+
2337
+
2338
+
2339
+
2340
+
2341
+
2342
+
2343
+
2344
+
2345
+
2346
+
2347
+
2348
+
2349
+
2350
+
2351
+
2352
+
2353
+
2354
+
2355
+
2356
+
2357
+
2358
+
2359
+
2360
+
2361
+
2362
+
2363
+
2364
+
2365
+
2366
+
2367
+
2368
+
2369
+
2370
+
2371
+
2372
+
2373
+
2374
+
2375
+
2376
+
2377
+
2378
+
2379
+
2380
+
2381
+
2382
+
2383
+
2384
+
2385
+
2386
+
2387
+
2388
+
2389
+
2390
+
2391
+
2392
+
2393
+
2394
+
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+
2425
+
2426
+
2427
+
2428
+
2429
+
2430
+
2431
+
2432
+
2433
+
2434
+
2435
+
2436
+
2437
+
2438
+
2439
+
2440
+
2441
+
2442
+
2443
+
2444
+
2445
+
2446
+
2447
+
2448
+
2449
+
2450
+
2451
+
2452
+
2453
+
2454
+
2455
+
2456
+
2457
+
2458
+
2459
+
2460
+
2461
+
2462
+
2463
+
2464
+
2465
+
2466
+
2467
+
2468
+
2469
+
2470
+
2471
+
2472
+
2473
+
2474
+
2475
+
2476
+
2477
+
2478
+
2479
+
2480
+
2481
+
2482
+
2483
+
2484
+
2485
+
2486
+
2487
+
2488
+
2489
+
2490
+
2491
+
2492
+
2493
+
2494
+
2495
+
2496
+
2497
+
2498
+
2499
+
2500
+
2501
+
2502
+
2503
+
2504
+
2505
+
2506
+
2507
+
2508
+
2509
+
2510
+
2511
+
2512
+
2513
+
2514
+
2515
+
2516
+
2517
+
2518
+
2519
+
2520
+
2521
+
2522
+
2523
+
2524
+
2525
+
2526
+
2527
+
2528
+
2529
+
2530
+
2531
+
2532
+
2533
+
2534
+
2535
+
2536
+
2537
+
2538
+
2539
+
2540
+
2541
+
2542
+
2543
+
2544
+
2545
+
2546
+
2547
+
2548
+ ĩ
2549
+
2550
+
2551
+
2552
+
2553
+
2554
+
2555
+
2556
+
2557
+
2558
+
2559
+
2560
+
2561
+
2562
+
2563
+
2564
+
2565
+
2566
+
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
- torch==1.13.1
2
- torchvision==0.14.1
3
- torchaudio==0.13.1
4
- numpy==1.23.5
5
  face_alignment==1.3.0
6
  imageio==2.19.3
7
  imageio-ffmpeg==0.4.7
@@ -21,6 +21,7 @@ facexlib
21
  dlib-bin
22
  gfpgan
23
  av
 
24
  safetensors
25
  gradio
26
  soundfile
@@ -42,4 +43,5 @@ torchdiffeq
42
  transformers_stream_generator
43
  vocos
44
  wandb
45
- x_transformers>=1.31.14
 
 
1
+ torch==2.4.0
2
+ torchaudio==2.4.0
3
+ torchvision==0.19.0
4
+
5
  face_alignment==1.3.0
6
  imageio==2.19.3
7
  imageio-ffmpeg==0.4.7
 
21
  dlib-bin
22
  gfpgan
23
  av
24
+
25
  safetensors
26
  gradio
27
  soundfile
 
43
  transformers_stream_generator
44
  vocos
45
  wandb
46
+ x_transformers>=1.31.14
47
+ f5-tts
src/face3d/models/arcface_torch/inference.py CHANGED
@@ -20,8 +20,15 @@ def inference(weight, name, img):
20
  img = torch.from_numpy(img).unsqueeze(0).float()
21
  img.div_(255).sub_(0.5).div_(0.5)
22
  net = get_model(name, fp16=False)
23
- net.load_state_dict(torch.load(weight))
 
 
 
 
 
24
  net.eval()
 
 
25
  feat = net(img).numpy()
26
  print(feat)
27
 
 
20
  img = torch.from_numpy(img).unsqueeze(0).float()
21
  img.div_(255).sub_(0.5).div_(0.5)
22
  net = get_model(name, fp16=False)
23
+ # For PyTorch 2.x, weights_only is supported, but fallback for older checkpoints
24
+ try:
25
+ state_dict = torch.load(weight, weights_only=True)
26
+ except TypeError:
27
+ state_dict = torch.load(weight)
28
+ net.load_state_dict(state_dict)
29
  net.eval()
30
+ # Optional: For PyTorch 2.x, you can compile the model for speedup
31
+ # net = torch.compile(net)
32
  feat = net(img).numpy()
33
  print(feat)
34
 
src/face3d/models/networks.py CHANGED
@@ -59,11 +59,20 @@ def get_scheduler(optimizer, opt):
59
 
60
 
61
  def define_net_recon(net_recon, use_last_fc=False, init_path=None):
62
- return ReconNetWrapper(net_recon, use_last_fc=use_last_fc, init_path=init_path)
 
 
63
 
64
  def define_net_recog(net_recog, pretrained_path=None):
65
  net = RecogNetWrapper(net_recog=net_recog, pretrained_path=pretrained_path)
66
  net.eval()
 
 
 
 
 
 
 
67
  return net
68
 
69
  class ReconNetWrapper(nn.Module):
 
59
 
60
 
61
  def define_net_recon(net_recon, use_last_fc=False, init_path=None):
62
+ model = ReconNetWrapper(net_recon, use_last_fc=use_last_fc, init_path=init_path)
63
+ # Chỉ compile sau khi load state_dict xong!
64
+ return model
65
 
66
  def define_net_recog(net_recog, pretrained_path=None):
67
  net = RecogNetWrapper(net_recog=net_recog, pretrained_path=pretrained_path)
68
  net.eval()
69
+ # Use torch.compile for PyTorch 2.x+ if available
70
+ try:
71
+ import torch
72
+ net = torch.compile(net)
73
+ print("[INFO] RecogNetWrapper compiled with torch.compile for PyTorch 2.x+")
74
+ except AttributeError:
75
+ print("[INFO] torch.compile not available; running RecogNetWrapper without compilation.")
76
  return net
77
 
78
  class ReconNetWrapper(nn.Module):
src/face3d/util/util.py CHANGED
@@ -10,7 +10,6 @@ import argparse
10
  from argparse import Namespace
11
  import torchvision
12
 
13
-
14
  def str2bool(v):
15
  if isinstance(v, bool):
16
  return v
 
10
  from argparse import Namespace
11
  import torchvision
12
 
 
13
  def str2bool(v):
14
  if isinstance(v, bool):
15
  return v
src/gradio_demo.py CHANGED
@@ -165,6 +165,14 @@ class SadTalker():
165
 
166
  import gc; gc.collect()
167
 
168
- return return_path
 
 
 
 
 
 
 
 
169
 
170
 
 
165
 
166
  import gc; gc.collect()
167
 
168
+ # Fix: Copy video to a temp file to avoid ffmpeg overwrite error in Gradio
169
+ import tempfile
170
+ if os.path.isfile(return_path):
171
+ tmp_video = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
172
+ tmp_video.close()
173
+ shutil.copy(return_path, tmp_video.name)
174
+ return tmp_video.name
175
+ else:
176
+ return return_path
177
 
178
 
src/test_audio2coeff.py CHANGED
@@ -14,7 +14,11 @@ from src.audio2exp_models.audio2exp import Audio2Exp
14
  from src.utils.safetensor_helper import load_x_from_safetensor
15
 
16
  def load_cpk(checkpoint_path, model=None, optimizer=None, device="cpu"):
17
- checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
 
 
 
 
18
  if model is not None:
19
  model.load_state_dict(checkpoint['model'])
20
  if optimizer is not None:
@@ -119,5 +123,3 @@ class Audio2Coeff():
119
  #### relative head pose
120
  coeffs_pred_numpy[:, 64:70] = coeffs_pred_numpy[:, 64:70] + ( refpose_coeff[:num_frames, :] - refpose_coeff[0:1, :] )
121
  return coeffs_pred_numpy
122
-
123
-
 
14
  from src.utils.safetensor_helper import load_x_from_safetensor
15
 
16
  def load_cpk(checkpoint_path, model=None, optimizer=None, device="cpu"):
17
+ # For PyTorch 2.x, weights_only is supported, but fallback for older checkpoints
18
+ try:
19
+ checkpoint = torch.load(checkpoint_path, map_location=torch.device(device), weights_only=True)
20
+ except TypeError:
21
+ checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
22
  if model is not None:
23
  model.load_state_dict(checkpoint['model'])
24
  if optimizer is not None:
 
123
  #### relative head pose
124
  coeffs_pred_numpy[:, 64:70] = coeffs_pred_numpy[:, 64:70] + ( refpose_coeff[:num_frames, :] - refpose_coeff[0:1, :] )
125
  return coeffs_pred_numpy
 
 
src/utils/model2safetensor.py CHANGED
@@ -44,8 +44,11 @@ def load_cpk_facevid2vid(checkpoint_path, generator=None, discriminator=None,
44
  kp_detector=None, he_estimator=None, optimizer_generator=None,
45
  optimizer_discriminator=None, optimizer_kp_detector=None,
46
  optimizer_he_estimator=None, device="cpu"):
47
-
48
- checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
 
 
 
49
  if generator is not None:
50
  generator.load_state_dict(checkpoint['generator'])
51
  if kp_detector is not None:
@@ -138,4 +141,4 @@ model = SadTalker(kp_extractor, generator, netG, audio2pose_model, net_recon)
138
  save_file(model.state_dict(), "checkpoints/SadTalker_V0.0.2_"+str(size)+".safetensors")
139
 
140
  ### test
141
- load_cpk_facevid2vid_safetensor('checkpoints/SadTalker_V0.0.2_'+str(size)+'.safetensors', kp_detector=kp_extractor, generator=generator, he_estimator=None)
 
44
  kp_detector=None, he_estimator=None, optimizer_generator=None,
45
  optimizer_discriminator=None, optimizer_kp_detector=None,
46
  optimizer_he_estimator=None, device="cpu"):
47
+ # For PyTorch 2.x, weights_only is supported, but fallback for older checkpoints
48
+ try:
49
+ checkpoint = torch.load(checkpoint_path, map_location=torch.device(device), weights_only=True)
50
+ except TypeError:
51
+ checkpoint = torch.load(checkpoint_path, map_location=torch.device(device))
52
  if generator is not None:
53
  generator.load_state_dict(checkpoint['generator'])
54
  if kp_detector is not None:
 
141
  save_file(model.state_dict(), "checkpoints/SadTalker_V0.0.2_"+str(size)+".safetensors")
142
 
143
  ### test
144
+ load_cpk_facevid2vid_safetensor('checkpoints/SadTalker_V0.0.2_'+str(size)+'.safetensors', kp_detector=kp_extractor, generator=generator, he_estimator=None)