Spaces:
Sleeping
Sleeping
Commit
·
3cfc715
1
Parent(s):
5740c9e
fix: example don't work
Browse files- app.py +90 -48
- ipa/ipa.py +2 -1
app.py
CHANGED
@@ -108,7 +108,9 @@ def use_default_emb_or_custom_radio_input(use_default_emb_or_custom):
|
|
108 |
def language_radio_changed(language):
|
109 |
if language in g2p:
|
110 |
return gr.update(visible=False)
|
111 |
-
dialect_choices = [
|
|
|
|
|
112 |
return gr.update(
|
113 |
choices=dialect_choices,
|
114 |
value=dialect_choices[0],
|
@@ -117,6 +119,83 @@ def language_radio_changed(language):
|
|
117 |
)
|
118 |
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
demo = gr.Blocks(
|
121 |
title="臺灣南島語語音合成系統",
|
122 |
css="@import url(https://tauhu.tw/tauhu-oo.css);",
|
@@ -194,11 +273,7 @@ with demo:
|
|
194 |
interactive=len(default_dialect_choices) > 1,
|
195 |
)
|
196 |
|
197 |
-
|
198 |
-
language_radio_changed, inputs=[language_radio], outputs=[dialect_radio]
|
199 |
-
)
|
200 |
-
|
201 |
-
model_drop_down.input(
|
202 |
when_model_selected,
|
203 |
inputs=[model_drop_down],
|
204 |
outputs=[speaker_drop_down, language_radio, use_default_emb_or_custom_radio],
|
@@ -230,76 +305,43 @@ with demo:
|
|
230 |
],
|
231 |
allow_flagging="auto",
|
232 |
)
|
233 |
-
|
|
|
|
|
|
|
234 |
[
|
235 |
[
|
236 |
-
"阿美",
|
237 |
"南勢",
|
238 |
"U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.",
|
239 |
"阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。",
|
240 |
],
|
241 |
[
|
242 |
-
"阿美",
|
243 |
"恆春",
|
244 |
"O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.",
|
245 |
"當時的部落如同一個國家的概念。",
|
246 |
],
|
247 |
[
|
248 |
-
"阿美",
|
249 |
"馬蘭",
|
250 |
"O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.",
|
251 |
"而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。",
|
252 |
],
|
253 |
[
|
254 |
-
"阿美",
|
255 |
"秀姑巒",
|
256 |
"ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.",
|
257 |
"所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。",
|
258 |
],
|
259 |
[
|
260 |
-
"阿美",
|
261 |
"海岸",
|
262 |
"mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.",
|
263 |
"單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。",
|
264 |
],
|
265 |
-
[
|
266 |
-
"賽德克",
|
267 |
-
"德固達雅",
|
268 |
-
"Netun so laqi tnqliyan de, asi ka mangal ngayan rrudan na seediq tnquli ka ngayan laqi tnqliyan.",
|
269 |
-
"若是收養的子女,被收養子女的名字就要承傳收養者家族先人的名字。",
|
270 |
-
],
|
271 |
-
[
|
272 |
-
"賽德克",
|
273 |
-
"德鹿谷",
|
274 |
-
"Mnsuwil mangal hangan samac ni pnegalang uri.",
|
275 |
-
"有時也以動植物命名。",
|
276 |
-
],
|
277 |
-
[
|
278 |
-
"賽德克",
|
279 |
-
"都達",
|
280 |
-
"so ana manu hhmaan Sediq u niqan balay snlhayan na.",
|
281 |
-
"農耕行為極度神聖化。",
|
282 |
-
],
|
283 |
-
[
|
284 |
-
"太魯閣",
|
285 |
-
None,
|
286 |
-
"Rudan Truku sexual o kmgaaw ptasan dqras kana, ida qtaan bi bitaq sayang ka rudan ptasan dqras.",
|
287 |
-
"過去太魯閣族的耆老都是文面的,直到最近文面老人還能夠看得到。",
|
288 |
-
],
|
289 |
-
[
|
290 |
-
"阿美",
|
291 |
-
"秀姑巒",
|
292 |
-
"ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.",
|
293 |
-
"所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。",
|
294 |
-
],
|
295 |
],
|
296 |
label="範例",
|
297 |
-
inputs=[
|
298 |
-
|
299 |
-
dialect_radio,
|
300 |
-
input_text,
|
301 |
-
gr.Textbox(visible=False),
|
302 |
-
],
|
303 |
)
|
|
|
|
|
|
|
304 |
|
305 |
demo.launch()
|
|
|
108 |
def language_radio_changed(language):
|
109 |
if language in g2p:
|
110 |
return gr.update(visible=False)
|
111 |
+
dialect_choices = [
|
112 |
+
(tag.split("_")[1], tag.split("_")[1]) for tag in g2p.keys() if language in tag
|
113 |
+
]
|
114 |
return gr.update(
|
115 |
choices=dialect_choices,
|
116 |
value=dialect_choices[0],
|
|
|
119 |
)
|
120 |
|
121 |
|
122 |
+
def update_example(language):
|
123 |
+
component_props = examples.dataset.component_props
|
124 |
+
|
125 |
+
if language in g2p:
|
126 |
+
component_props[0]["visible"] = False
|
127 |
+
else:
|
128 |
+
component_props[0]["visible"] = True
|
129 |
+
component_props[0]["choices"] = [
|
130 |
+
(tag.split("_")[1], tag.split("_")[1])
|
131 |
+
for tag in g2p.keys()
|
132 |
+
if language in tag
|
133 |
+
]
|
134 |
+
if language == "阿美":
|
135 |
+
return gr.Dataset(
|
136 |
+
component_props=component_props,
|
137 |
+
samples=[
|
138 |
+
[
|
139 |
+
"南勢",
|
140 |
+
"U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.",
|
141 |
+
"阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。",
|
142 |
+
],
|
143 |
+
[
|
144 |
+
"恆春",
|
145 |
+
"O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.",
|
146 |
+
"當時的部落如同一個國家的概念。",
|
147 |
+
],
|
148 |
+
[
|
149 |
+
"馬蘭",
|
150 |
+
"O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.",
|
151 |
+
"而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。",
|
152 |
+
],
|
153 |
+
[
|
154 |
+
"秀姑巒",
|
155 |
+
"ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.",
|
156 |
+
"所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。",
|
157 |
+
],
|
158 |
+
[
|
159 |
+
"海岸",
|
160 |
+
"mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.",
|
161 |
+
"單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。",
|
162 |
+
],
|
163 |
+
],
|
164 |
+
)
|
165 |
+
if language == "賽德克":
|
166 |
+
return gr.Dataset(
|
167 |
+
component_props=component_props,
|
168 |
+
samples=[
|
169 |
+
[
|
170 |
+
"德固達雅",
|
171 |
+
"Netun so laqi tnqliyan de, asi ka mangal ngayan rrudan na seediq tnquli ka ngayan laqi tnqliyan.",
|
172 |
+
"若是收養的子女,被收養子女的名字就要承傳收養者家族先人的名字。",
|
173 |
+
],
|
174 |
+
[
|
175 |
+
"德鹿谷",
|
176 |
+
"Mnsuwil mangal hangan samac ni pnegalang uri.",
|
177 |
+
"有時也以動植物命名。",
|
178 |
+
],
|
179 |
+
[
|
180 |
+
"都達",
|
181 |
+
"so ana manu hhmaan Sediq u niqan balay snlhayan na.",
|
182 |
+
"農耕行為極度神聖化。",
|
183 |
+
],
|
184 |
+
],
|
185 |
+
)
|
186 |
+
if language == "太魯閣":
|
187 |
+
return gr.Dataset(
|
188 |
+
component_props=component_props,
|
189 |
+
samples=[
|
190 |
+
[
|
191 |
+
"",
|
192 |
+
"Rudan Truku sexual o kmgaaw ptasan dqras kana, ida qtaan bi bitaq sayang ka rudan ptasan dqras.",
|
193 |
+
"過去太魯閣族的耆老都是文面的,直到最近文面老人還能夠看得到。",
|
194 |
+
],
|
195 |
+
],
|
196 |
+
)
|
197 |
+
|
198 |
+
|
199 |
demo = gr.Blocks(
|
200 |
title="臺灣南島語語音合成系統",
|
201 |
css="@import url(https://tauhu.tw/tauhu-oo.css);",
|
|
|
273 |
interactive=len(default_dialect_choices) > 1,
|
274 |
)
|
275 |
|
276 |
+
model_drop_down.change(
|
|
|
|
|
|
|
|
|
277 |
when_model_selected,
|
278 |
inputs=[model_drop_down],
|
279 |
outputs=[speaker_drop_down, language_radio, use_default_emb_or_custom_radio],
|
|
|
305 |
],
|
306 |
allow_flagging="auto",
|
307 |
)
|
308 |
+
|
309 |
+
dummy_chinese_text = gr.Textbox(visible=False, label="中文")
|
310 |
+
|
311 |
+
examples = gr.Examples(
|
312 |
[
|
313 |
[
|
|
|
314 |
"南勢",
|
315 |
"U payniyaru’ nu pangcah i matiya, u ina haw ku miterungay, mikadavu ku vavainay i vavahiyan a luma.",
|
316 |
"阿美族的原始社會,是以女人為主的母系社會,男子授室入贅女家。",
|
317 |
],
|
318 |
[
|
|
|
319 |
"恆春",
|
320 |
"O todong no cecayay a kitakit ko sa’osi to itiya:ay ho a kasaniyaro’.",
|
321 |
"當時的部落如同一個國家的概念。",
|
322 |
],
|
323 |
[
|
|
|
324 |
"馬蘭",
|
325 |
"O sata’angayay a pisanga’an to tilong ko Tafalong itiya ho, mapaliwal i kasaniyaroaro’ ko misatilongan to sakacaloway no finawlan i ’orip a lalosidan.",
|
326 |
"而太巴塱部落則是當時最大的製造陶埸域,供應各部落族人日常生活的陶器用品。",
|
327 |
],
|
328 |
[
|
|
|
329 |
"秀姑巒",
|
330 |
"ci ngangan ko Pangcah to Awa^, ’Afo^, Oning, Falah sanay a ngangan.",
|
331 |
"所以阿美族有Awa^(一無所有)、’Afo^(碳灰)、Oning(污垢)、Falah(丟棄)……等這樣的名字。",
|
332 |
],
|
333 |
[
|
|
|
334 |
"海岸",
|
335 |
"mikayat ko kawili kawanan a kamay to tatihi, masakawanan ko rakat a mitaliyok, lahoday ko piperok, mato’asay, o wawa ato lafang maemin mangaay a masakero.",
|
336 |
"單純的手牽手,向右移動來繞圓圈,很輕鬆,老少咸宜全下場跳。",
|
337 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
],
|
339 |
label="範例",
|
340 |
+
inputs=[dialect_radio, input_text, dummy_chinese_text],
|
341 |
+
cache_examples=False,
|
|
|
|
|
|
|
|
|
342 |
)
|
343 |
+
language_radio.change(
|
344 |
+
language_radio_changed, inputs=[language_radio], outputs=[dialect_radio]
|
345 |
+
).then(update_example, inputs=[language_radio], outputs=[examples.dataset])
|
346 |
|
347 |
demo.launch()
|
ipa/ipa.py
CHANGED
@@ -18,7 +18,7 @@ def can_form_string(x, symbol_dict):
|
|
18 |
|
19 |
def text_to_ipa(text, lang_tag, g2p):
|
20 |
ipa = []
|
21 |
-
words = text.split()
|
22 |
|
23 |
print(words)
|
24 |
|
@@ -27,6 +27,7 @@ def text_to_ipa(text, lang_tag, g2p):
|
|
27 |
result, matched_parts = can_form_string(word, g2p[lang_tag])
|
28 |
|
29 |
if result is False:
|
|
|
30 |
return ""
|
31 |
|
32 |
for matched_part in matched_parts:
|
|
|
18 |
|
19 |
def text_to_ipa(text, lang_tag, g2p):
|
20 |
ipa = []
|
21 |
+
words = text.lower().replace(".", "").split() # change in future
|
22 |
|
23 |
print(words)
|
24 |
|
|
|
27 |
result, matched_parts = can_form_string(word, g2p[lang_tag])
|
28 |
|
29 |
if result is False:
|
30 |
+
print(word)
|
31 |
return ""
|
32 |
|
33 |
for matched_part in matched_parts:
|