Spaces:

Archan
/

ArXivAudio

Runtime error

Archan commited on Aug 11, 2022

Commit

bbdb87d

•

1 Parent(s): 0950ce1

Update tts.py

Files changed (1) hide show

tts.py CHANGED Viewed

@@ -1,18 +1,35 @@
-from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
-from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
-models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
-    "facebook/fastspeech2-en-ljspeech",
-    arg_overrides={"vocoder": "hifigan", "fp16": False}
 )
-model = models
-TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
-generator = task.build_generator(model, cfg)
-def tts(text):
-  print("Converting to TTS")
-  sample = TTSHubInterface.get_model_input(task, text)
-  wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
-  return wave, rate

+import time
+import torch
+import scipy.io.wavfile
+from espnet2.bin.tts_inference import Text2Speech
+from espnet2.utils.types import str_or_none
+tagen = 'kan-bayashi/ljspeech_vits'
+vocoder_tagen = "none"
+text2speechen = Text2Speech.from_pretrained(
+    model_tag=str_or_none(tagen),
+    vocoder_tag=str_or_none(vocoder_tagen),
+    device="cpu",
+    # Only for Tacotron 2 & Transformer
+    threshold=0.5,
+    # Only for Tacotron 2
+    minlenratio=0.0,
+    maxlenratio=10.0,
+    use_att_constraint=False,
+    backward_window=1,
+    forward_window=3,
+    # Only for FastSpeech & FastSpeech2 & VITS
+    speed_control_alpha=1.0,
+    # Only for VITS
+    noise_scale=0.333,
+    noise_scale_dur=0.333,
 )
+def inference(text,lang):
+  with torch.no_grad():
+      if lang == "english":
+          wav = text2speechen(text)["wav"]
+          scipy.io.wavfile.write("./audio/out.wav",text2speechen.fs , wav.view(-1).cpu().numpy())
+  return  "./audio/out.wav"