File size: 13,111 Bytes
873fbd2
 
707851b
c4d001b
2e5681a
707851b
c4d001b
 
 
 
 
2e5681a
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
873fbd2
 
 
 
9eb7580
873fbd2
10e10ea
 
 
 
c4d001b
 
 
 
 
 
 
067400f
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e10ea
 
c4d001b
9cde8f9
 
 
 
 
 
 
10e10ea
 
 
 
 
c4d001b
 
9cde8f9
c4d001b
10e10ea
c4d001b
 
 
3cfc715
 
 
 
 
9cde8f9
3cfc715
 
 
 
 
 
 
9690601
10e10ea
 
9690601
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cfc715
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5006a88
2fd4b91
 
5006a88
 
3cfc715
c4d001b
2fd4b91
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec85a52
c4d001b
 
 
 
 
9059222
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9059222
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cfc715
c4d001b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3cfc715
 
 
 
c4d001b
 
9690601
 
 
c4d001b
 
9690601
 
 
5759558
 
9690601
 
 
5759558
 
 
9690601
 
5759558
 
9690601
 
 
5759558
c4d001b
 
3cfc715
 
c4d001b
3cfc715
 
 
707851b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
import re

import gradio as gr
import numpy as np
import spaces

from ipa import g2p
from ipa.ipa import text_to_ipa
from models import models_config


@spaces.GPU
def _do_tts(model_id, ipa, language_name, speaker_name=None, speaker_wav=None):
    model = models_config[model_id]["model"]
    if speaker_wav is not None:
        return model.tts(
            ipa,
            speaker_wav=speaker_wav,
            language_name=language_name,
            split_sentences=False,
        )
    return model.tts(
        ipa,
        speaker_name=speaker_name,
        language_name=language_name,
        split_sentences=False,
    )


def text_to_speech(
    model_id: str,
    use_default_emb_or_custom: str,
    speaker_wav,
    speaker: str,
    language: str,
    dialect: str,
    speed: float,
    text: str,
):
    if len(text) == 0:
        raise gr.Error("請勿輸入空字串。")
    tag = language
    if language not in g2p:
        tag = f"{language}_{dialect}"

    text = text.lower()
    text = re.sub(r"[.?!]", "", text)
    text = text.replace("'", "’")

    ignore_comma = "gt3" not in model_id

    if language == "阿美_秀姑巒":
        ipa = text
    else:
        ipa = text_to_ipa(text, tag, g2p, ignore_comma)

    models_config[model_id]["model"].tts_model.length_scale = speed
    if use_default_emb_or_custom == "預設語者":
        wav = _do_tts(
            model_id,
            ipa,
            speaker_name=speaker
            if len(models_config[model_id]["speaker_mapping"]) >= 1
            else None,
            language_name=language,
        )
    else:
        wav = _do_tts(
            model_id,
            ipa,
            speaker_wav=speaker_wav,
            language_name=language,
        )

    return (
        models_config[model_id]["model"].tts_model.config.audio.sample_rate,
        np.array(wav),
    )


def when_model_selected(model_id):
    model_config = models_config[model_id]

    speaker_drop_down_choices = [
        (k, v) for k, v in model_config["speaker_mapping"].items()
    ]

    language_radio_choices = [
        (k, v) for k, v in model_config["language_mapping"].items()
    ]

    use_default_emb_or_ref_radio_visible = False
    if model_config["model"].tts_model.config.model_args.speaker_encoder_model_path:
        use_default_emb_or_ref_radio_visible = True

    return (
        gr.update(
            choices=speaker_drop_down_choices,
            value=speaker_drop_down_choices[0][1]
            if len(speaker_drop_down_choices) > 0
            else None,
            interactive=len(speaker_drop_down_choices) > 1,
        ),
        gr.update(
            choices=language_radio_choices,
            value=language_radio_choices[0][1],
            interactive=len(language_radio_choices) > 1,
        ),
        gr.update(visible=use_default_emb_or_ref_radio_visible, value="預設語者"),
    )


def use_default_emb_or_custom_radio_input(use_default_emb_or_custom):
    if use_default_emb_or_custom == "客製化語者":
        return gr.update(visible=True), gr.update(visible=False)
    return gr.update(visible=False), gr.update(visible=True)


def language_radio_changed(language):
    visible = language not in g2p

    if language in g2p:
        dialect_choices = [("None", "")]
    else:
        dialect_choices = [
            (tag.split("_")[1], tag.split("_")[1])
            for tag in g2p.keys()
            if language in tag
        ]

    if language == "阿美_秀姑巒":
        dialect_choices = [("秀姑巒", "秀姑巒")]
        visible = True

    return gr.update(
        choices=dialect_choices,
        value=dialect_choices[0][1],
        interactive=len(dialect_choices) > 1,
        visible=visible,
    )


def update_example(language):
    component_props = examples.dataset.component_props

    if language in g2p:
        component_props[0]["visible"] = False
        component_props[0]["choices"] = [("None", "")]
    else:
        component_props[0]["visible"] = True
        component_props[0]["choices"] = [
            (tag.split("_")[1], tag.split("_")[1])
            for tag in g2p.keys()
            if language in tag
        ]
    if language == "阿美_秀姑巒":
        component_props[0]["visible"] = True
        component_props[0]["choices"] = [("秀姑巒", "秀姑巒")]
        return gr.Dataset(
            component_props=component_props,
            samples=[
                [
                    "秀姑巒",
                    "wa:tah ko fana’ iso akong",
                    "阿公好厲害",
                ],
                [
                    "秀姑巒",
                    "romakat kako a talapicodadan to romi’ami’ad",
                    "我每天都去學校上課",
                ],
                [
                    "秀姑巒",
                    "o satata’akay kako itini i palapalaan o satadaci’ice:lay kako saan",
                    "我是草原上最大的動物 我的力氣最大",
                ],
                [
                    "秀姑巒",
                    "kering sa masoni^ to ko pipahanhanan a tatokian o fe:soc no niyam a tayra i piondoan",
                    "下課鐘一響 我們朝操場跑去",
                ],
                [
                    "秀姑巒",
                    "sa’icelaw ita aka ka tifahaw ko faloco’",
                    "加油 不停的翻轉吧",
                ],
            ],
        )
    if language == "阿美":
        return gr.Dataset(
            component_props=component_props,
            samples=[
                [
                    "南勢",
                    "U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.",
                    "阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。",
                ],
                [
                    "恆春",
                    "O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.",
                    "當時的部落如同一個國家的概念。",
                ],
                [
                    "馬蘭",
                    "O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.",
                    "而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。",
                ],
                [
                    "秀姑巒",
                    "ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.",
                    "所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。",
                ],
                [
                    "海岸",
                    "mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.",
                    "單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。",
                ],
            ],
        )
    if language == "賽德克":
        return gr.Dataset(
            component_props=component_props,
            samples=[
                [
                    "德固達雅",
                    "Netun so laqi tnqliyan de, asi ka mangal ngayan rrudan na seediq tnquli ka ngayan laqi tnqliyan.",
                    "若是收養的子女,被收養子女的名字就要承傳收養者家族先人的名字。",
                ],
                [
                    "德鹿谷",
                    "Mnsuwil mangal hangan samac ni pnegalang uri.",
                    "有時也以動植物命名。",
                ],
                [
                    "都達",
                    "so ana manu hhmaan Sediq u niqan balay snlhayan na.",
                    "農耕行為極度神聖化。",
                ],
            ],
        )
    if language == "太魯閣":
        return gr.Dataset(
            component_props=component_props,
            samples=[
                [
                    "",
                    "Rudan Truku sexual o kmgaaw ptasan dqras kana, ida qtaan bi bitaq sayang ka rudan ptasan dqras.",
                    "過去太魯閣族的耆老都是文面的,直到最近文面老人還能夠看得到。",
                ],
            ],
        )


def get_title():
    with open("DEMO.md") as tong:
        return tong.readline().strip("# ")


demo = gr.Blocks(
    title=get_title(),
    css="@import url(https://tauhu.tw/tauhu-oo.css);",
    theme=gr.themes.Default(
        font=(
            "tauhu-oo",
            gr.themes.GoogleFont("Source Sans Pro"),
            "ui-sans-serif",
            "system-ui",
            "sans-serif",
        )
    ),
)

with demo:
    default_model_id = list(models_config.keys())[0]
    model_drop_down = gr.Dropdown(
        models_config.keys(),
        value=default_model_id,
        label="模型",
    )
    use_default_emb_or_custom_radio = gr.Radio(
        label="語者類型",
        choices=["預設語者", "客製化語者"],
        value="預設語者",
        visible=True,
        show_label=False,
    )
    speaker_wav = gr.Audio(
        label="客製化語音",
        visible=False,
        editable=False,
        type="filepath",
        waveform_options=gr.WaveformOptions(
            show_controls=False,
            sample_rate=16000,
        ),
    )
    speaker_drop_down = gr.Dropdown(
        choices=[
            (k, v)
            for k, v in models_config[default_model_id]["speaker_mapping"].items()
        ],
        value=list(models_config[default_model_id]["speaker_mapping"].values())[0],
        label="語者",
        interactive=len(models_config[default_model_id]["speaker_mapping"]) > 1,
        visible=True,
    )
    use_default_emb_or_custom_radio.change(
        use_default_emb_or_custom_radio_input,
        inputs=[use_default_emb_or_custom_radio],
        outputs=[speaker_wav, speaker_drop_down],
    )

    default_language = list(
        models_config[default_model_id]["language_mapping"].values()
    )[0]
    language_radio = gr.Radio(
        choices=[
            (k, v)
            for k, v in models_config[default_model_id]["language_mapping"].items()
        ],
        value=default_language,
        label="語言",
        interactive=len(models_config[default_model_id]["language_mapping"]) > 1,
    )

    default_dialect_choices = [
        tag.split("_")[1] for tag in g2p.keys() if default_language in tag
    ]
    dialect_radio = gr.Radio(
        choices=default_dialect_choices,
        value=default_dialect_choices[0],
        label="方言",
        interactive=len(default_dialect_choices) > 1,
    )

    model_drop_down.change(
        when_model_selected,
        inputs=[model_drop_down],
        outputs=[speaker_drop_down, language_radio, use_default_emb_or_custom_radio],
    )

    input_text = gr.Textbox(
        label="輸入文字",
        value="",
    )

    speed = gr.Slider(maximum=1.5, minimum=0.5, value=1, label="語速")

    with open("DEMO.md") as tong:
        gr.Markdown(tong.read())
    gr.Interface(
        text_to_speech,
        inputs=[
            model_drop_down,
            use_default_emb_or_custom_radio,
            speaker_wav,
            speaker_drop_down,
            language_radio,
            dialect_radio,
            speed,
            input_text,
        ],
        outputs=[
            gr.Audio(interactive=False, label="合成語音", show_download_button=True),
        ],
        allow_flagging="auto",
    )

    dummy_chinese_text = gr.Textbox(visible=False, label="中文")

    examples = gr.Examples(
        [
            [
                "秀姑巒",
                "wa:tah ko fana’ iso akong",
                "阿公好厲害",
            ],
            [
                "秀姑巒",
                "romakat kako a talapicodadan to romi’ami’ad",
                "我每天都去學校上課",
            ],
            [
                "秀姑巒",
                "o satata’akay kako itini i palapalaan o satadaci’ice:lay kako saan",
                "我是草原上最大的動物 我的力氣最大",
            ],
            [
                "秀姑巒",
                "kering sa masoni^ to ko pipahanhanan a tatokian o fe:soc no niyam a tayra i piondoan",
                "下課鐘一響 我們朝操場跑去",
            ],
            [
                "秀姑巒",
                "sa’icelaw ita aka ka tifahaw ko faloco’",
                "加油 不停的翻轉吧",
            ],
        ],
        label="範例",
        inputs=[dialect_radio, input_text, dummy_chinese_text],
        cache_examples=False,
    )
    language_radio.change(
        language_radio_changed, inputs=[language_radio], outputs=[dialect_radio]
    ).then(update_example, inputs=[language_radio], outputs=[examples.dataset])

demo.launch()