Spaces:

tobiccino
/

tts

Sleeping

App Files Files Community

tobiccino commited on Feb 22, 2023

Commit

320b21c

•

1 Parent(s): 4acb066

add stop duration option

Browse files

Files changed (4) hide show

app.py +93 -8
requirements.txt +2 -1
vietTTS/hifigan/mel2wave.py +0 -4
vietTTS/nat/text2mel.py +0 -6

app.py CHANGED Viewed

@@ -6,33 +6,118 @@ import gradio as gr
 import os
-def text_to_speech(text,stop_duration_text):
     print("starting")
     # prevent too long text
     if len(text) > 500:
         text = text[:500]
     # stop_duration_float = float(stop_duration_text)
     text = nat_normalize_text(text)
     mel = text2mel(
         text,
         "lexicon.txt",
-        stop_duration_text,
         "acoustic_latest_ckpt.pickle",
         "duration_latest_ckpt.pickle",
     )
-    print("mel")
-    print(mel)
     wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
-    print("wave")
-    print(wave)
     return (wave * (2**15)).astype(np.int16)
-def speak(text,stop_duration_text):
-    y = text_to_speech(text,stop_duration_text)
     return 16_000, y
 title = "SLT TTS"
 description = "SLT Vietnamese Text to speech demo."

 import os
+def text_to_speech(text,stop_duration):
     print("starting")
     # prevent too long text
     if len(text) > 500:
         text = text[:500]
     # stop_duration_float = float(stop_duration_text)
+    text = clean_text(text)
     text = nat_normalize_text(text)
     mel = text2mel(
         text,
         "lexicon.txt",
+        stop_duration,
         "acoustic_latest_ckpt.pickle",
         "duration_latest_ckpt.pickle",
     )
     wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
     return (wave * (2**15)).astype(np.int16)
+def speak(text,stop_duration):
+    y = text_to_speech(text,stop_duration)
     return 16_000, y
+def clean_text(test_string):
+  list_word = test_string.split()
+  # print(list_word)
+  regex = r"\d{2}(?P<sep>[-/])\d{1,2}(?P=sep)\d{4}"
+  for word in list_word :
+    try:
+      # print(word)
+      searchbox_result = re.match(regex, word)
+      day = searchbox_result.group(0)
+      day2 = day
+      day2 = day2.replace('/',' ').replace('-',' ')
+      list_date = day2.split(' ')
+      date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' năm ' + n2w(list_date[2])
+      # print(date_result)
+      test_string = test_string.replace(word, date_result)
+    except AttributeError:
+      # print(word)
+      # print("can't make a group")
+      continue
+  # print(test_string)
+  regex2 = r"\d{2}(?P<sep>[-/])\d{1,2}"
+  for word in list_word :
+    try:
+      # print(word)
+      searchbox_result = re.match(regex2, word)
+      day = searchbox_result.group(0)
+      day2 = day
+      day2 = day2.replace('/',' ').replace('-',' ')
+      list_date = day2.split(' ')
+      date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1])
+      # print(date_result)
+      test_string = test_string.replace(word, date_result)
+    except AttributeError:
+      # print(word)
+      # print("can't make a group")
+      continue
+  # print(test_string)
+  regex3 = r"\d{1,2}(?P<sep>[h:])\d{1,2}"
+  for word in list_word :
+    try:
+      # print(word)
+      searchbox_result = re.match(regex3, word)
+      day = searchbox_result.group(0)
+      day2 = day
+      day2 = day2.replace('h',' ').replace(':',' ')
+      list_date = day2.split(' ')
+      date_result = n2w(list_date[0]) + ' giờ ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' phút '
+      # print(date_result)
+      test_string = test_string.replace(word, date_result)
+    except AttributeError:
+      # print(word)
+      # print("can't make a group")
+      continue
+  print(test_string)
+  for word in list_word :
+    try:
+      if word.isdigit() :
+        # print(word)
+        text_result = n2w_single(word)
+        # print(text_result)
+        test_string = test_string.replace(word, text_result, 1)
+    except AttributeError:
+      # print(word)
+      print("can't make a group")
+      continue
+  return test_string
 title = "SLT TTS"
 description = "SLT Vietnamese Text to speech demo."

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ tabulate
 textgrid@ git+https://github.com/kylebgorman/textgrid.git
 tqdm
 matplotlib
-gradio==3.19.0

 textgrid@ git+https://github.com/kylebgorman/textgrid.git
 tqdm
 matplotlib
+noisereduce
+vietnam_number

vietTTS/hifigan/mel2wave.py CHANGED Viewed

@@ -37,10 +37,6 @@ def mel2wave(
     aux = {}
     wav, aux = forward.apply(params, aux, rng, mel)
     wav = jnp.squeeze(wav)
-    print("wav : ")
-    print(wav)
     jax.config.update('jax_platform_name', 'cpu')
     audio = jax.device_get(wav)
-    print("audio : ")
-    print(audio)
     return audio

     aux = {}
     wav, aux = forward.apply(params, aux, rng, mel)
     wav = jnp.squeeze(wav)
     jax.config.update('jax_platform_name', 'cpu')
     audio = jax.device_get(wav)
     return audio

vietTTS/nat/text2mel.py CHANGED Viewed

@@ -100,13 +100,7 @@ def text2mel(
     durations = jnp.where(
         np.array(tokens)[None, :] == FLAGS.word_end_index, 0.0, durations
     )
-    print("acoustic_ckpt : ")
-    print(acoustic_ckpt)
-    print("duration_ckpt : ")
-    print(duration_ckpt)
     mels = predict_mel(tokens, durations, acoustic_ckpt)
-    print("mels : ")
-    print(mels)
     if tokens[-1] == FLAGS.sil_index:
         end_silence = durations[0, -1].item()
         silence_frame = int(end_silence * FLAGS.sample_rate / (FLAGS.n_fft // 4))

     durations = jnp.where(
         np.array(tokens)[None, :] == FLAGS.word_end_index, 0.0, durations
     )
     mels = predict_mel(tokens, durations, acoustic_ckpt)
     if tokens[-1] == FLAGS.sil_index:
         end_silence = durations[0, -1].item()
         silence_frame = int(end_silence * FLAGS.sample_rate / (FLAGS.n_fft // 4))