File size: 3,073 Bytes
ea9036c
 
 
7d0bff2
201db01
7d0bff2
 
201db01
7d0bff2
 
 
 
201db01
7d0bff2
 
201db01
 
7d0bff2
 
 
 
ea9036c
ebe9cbe
 
ea9036c
 
 
 
 
 
 
 
 
 
7d0bff2
201db01
 
 
 
7d0bff2
 
 
 
 
 
 
 
ebe9cbe
 
 
 
 
 
201db01
 
 
 
7d0bff2
 
 
 
 
 
ebe9cbe
7d0bff2
 
 
 
ebe9cbe
 
 
7d0bff2
 
ea9036c
 
ebe9cbe
201db01
 
 
ebe9cbe
7d0bff2
201db01
ea9036c
 
 
 
7d0bff2
ea9036c
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from subprocess import call
import gradio as gr
import os
from TTS.api import TTS
from TTS.utils.synthesizer import Synthesizer

# List available 🐸TTS models and choose the first one
# all_models = TTS.list_models()
# for model in all_models:
#     print(model)

# print("Using model: ", all_models[0])
# model = all_models[0]
# Init TTS

MAX_TXT_LEN = 500


print("Downloading model...", '')

voiceCloneModel = TTS('tts_models/multilingual/multi-dataset/your_tts')

# tts = TTS("tts_models/de/thorsten/tacotron2-DDC")


def run_cmd(command):
    try:
        print(command)
        call(command)
    except KeyboardInterrupt:
        print("Process interrupted")
        sys.exit(1)


def inference(text, speaker):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(
            f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    if (speaker == 'Speaker-1'):
        speaker = 'input/amitabh.mp3'
    elif (speaker == 'Speaker-2'):
        speaker = 'input/amrish.mp3'
    elif (speaker == 'Speaker-3'):
        speaker = 'input/obama.mp3'
    elif (speaker == 'Speaker-4'):
        speaker = 'input/trump.wav'
    elif (speaker == 'Rock'):
        speaker = 'input/sample/Dwayne-Johnson-sample.wav'
    elif (speaker == 'Elon'):
        speaker = 'input/sample/Elon-Sample.wav'
    elif (speaker == 'Obama'):
        speaker = 'input/sample/Obama-sample.wav'
    elif (speaker == 'Tony'):
        speaker = 'input/sample/tony-stark-2.wav'
    elif (speaker == 'Madara Uchiha'):
        speaker = 'input/sample/Madara.mp3'
    else:
        speaker = 'input/z-default.wav'
    # print("speaker: ", speaker)
    # cmd = ['tts', '--text', text, '--out_path', 'output/tts_output.wav']
    # run_cmd(cmd)
    # Text to speech to a file
    # tts = TTS(model="tts_models/multilingual/multi-dataset/your_tts",
    #           progress_bar=False, gpu=True)
    voiceCloneModel.tts_to_file(text, speaker_wav=speaker,
                                language="en", file_path="output/output.wav")

    # for i in range(len(model.languages)):
    #     model.tts_to_file(text=text,
    #                       speaker=model.speakers[i], language=model.languages[0], file_path='output/output-'+str(i)+'.wav')

    return 'output/output.wav'


sampleInput = ['Speaker-1', 'Speaker-2', 'Speaker-3',
               'Speaker-4', 'Rock', 'Elon', 'Obama', 'Tony', 'Madara Uchiha']
inputs = [gr.Textbox(lines=5, label="Input Text"),
          gr.Dropdown(
              sampleInput, label="Sample Input", default="Speaker-1",)
          ]
outputs = gr.Audio(type="filepath", label="Output Audio")
title = "Text To Speech"
description = "An example of using TTS to generate speech from text."
article = ""
examples = [
    ["This is an open-source library that generates synthetic speech"]
]
gr.Interface(
    inference,
    inputs,
    outputs,
    verbose=True,
    title=title,
    description=description,
    article=article,
    examples=examples,
    enable_queue=True,
    allow_flagging="never",

).launch(debug=True)