Spaces:

rrg92
/

xtts

Running on Zero

App Files Files Community

Tonic commited on 9 days ago

Commit

f4158c0

verified ·

1 Parent(s): ac9a77d

fixes 500 error for some users

Browse files

![image.png](https://cdn-uploads.huggingface.co/production/uploads/62a3bb1cd0d8c2c2169f0b88/Vtx9Pq0tfbEaNam9xU5ra.png)

Files changed (1) hide show

app.py +447 -446

app.py CHANGED Viewed

@@ -1,446 +1,447 @@
-import gradio as gr
-import base64
-import tempfile
-import json
-import os
-from os.path import abspath
-import zipfile
-import random
-import xtts
-import re
-DO_CHECK = os.getenv('DO_CHECK', '1')
-OUTPUT = "./demo_outputs"
-cloned_speakers = {}
-print("Preparing file structure...")
-if not os.path.exists(OUTPUT):
-    os.mkdir(OUTPUT)
-    os.mkdir(os.path.join(OUTPUT, "cloned_speakers"))
-    os.mkdir(os.path.join(OUTPUT, "generated_audios"))
-elif os.path.exists(os.path.join(OUTPUT, "cloned_speakers")):
-    print("Loading existing cloned speakers...")
-    for file in os.listdir(os.path.join(OUTPUT, "cloned_speakers")):
-        if file.endswith(".json"):
-            with open(os.path.join(OUTPUT, "cloned_speakers", file), "r") as fp:
-                cloned_speakers[file[:-5]] = json.load(fp)
-    print("Available cloned speakers:", ", ".join(cloned_speakers.keys()))
-AUDIOS_DIR = os.path.join("demo_outputs", "generated_audios");
-ZIP_DIR = os.path.join("zip_outputs");
-print("Checking zip at", ZIP_DIR)
-if not os.path.exists(ZIP_DIR):
-    os.mkdir(ZIP_DIR)
-try:
-    print("Getting metadata from server ...")
-    LANUGAGES = xtts.get_languages()
-    print("Available languages:", ", ".join(LANUGAGES))
-    STUDIO_SPEAKERS = xtts.get_speakers()
-    print("Available studio speakers:", ", ".join(STUDIO_SPEAKERS.keys()))
-except:
-    raise Exception("Please make sure the server is running first.")
-def ExtractVars(input_string):
-    # Split the string into lines
-    lines = input_string.split('\n')
-    # Initialize an empty dictionary to store key-value pairs
-    result_dict = {
-         'prefix': None,
-         'name': '',
-         'speaker': None,
-         'num': None,
-    }
-    # List to hold lines that do not start with '!'
-    filtered_lines = []
-    # Iterate through each line
-    for line in lines:
-        # Check if the line starts with '!'
-        if line.strip().startswith('!'):
-            # Try to split the line into key and value parts
-            try:
-                # Split on '=' and strip whitespace from key and value
-                key, value = line.strip()[1:].split('=')
-                key = key.strip()
-                value = value.strip()
-                # Add to dictionary
-                result_dict[key] = value
-            except ValueError:
-                # Handle the case where there is no '=' or improper format
-                continue
-        elif len(line.strip()) > 0:
-            # Add the line to filtered_lines if it doesn't start with '!'
-            filtered_lines.append(line)
-    # Join the filtered lines back into a single string
-    filtered_string = '\n'.join(filtered_lines)
-    return result_dict, filtered_string
-def ParsePronucs(PronuncStr):
-    # Split the string into lines
-    lines = PronuncStr.split('\n')
-    # Initialize an empty dictionary to store key-value pairs
-    PronuncWords =  []
-    # Iterate through each line
-    for line in lines:
-        if len(line.strip()) > 0:
-            word,*text = line.strip().split('=',1)
-            word = word.strip()
-            text,*opts = text[0].split("|",1);
-            text = text.strip();
-            if len(opts) > 0:
-                opts = opts[0].strip().split(",");
-            else:
-                opts = [];
-            PronuncWords.append({'word':word, 'text':text, 'opts':opts})
-    return PronuncWords
-def FindSpeakerByName(name, speakerType):
-    srcItems = STUDIO_SPEAKERS if speakerType == "Studio" else cloned_speakers;
-    for key, value in srcItems.items():
-        if key == name:
-            return key,value
-        if key.split(" ")[0] == name:
-            return key,value;
-def clone_speaker(upload_file, clone_speaker_name, cloned_speaker_names):
-    embeddings =  xtts.predict_speaker(upload_file)
-    with open(os.path.join(OUTPUT, "cloned_speakers", clone_speaker_name + ".json"), "w") as fp:
-        json.dump(embeddings, fp)
-    cloned_speakers[clone_speaker_name] = embeddings
-    cloned_speaker_names.append(clone_speaker_name)
-    return upload_file, clone_speaker_name, cloned_speaker_names, gr.Dropdown(choices=cloned_speaker_names)
-def tts(text, pronunc, speaker_type, speaker_name_studio, speaker_name_custom, lang, temperature
-    ,speed,top_p,top_k, AllFileList,progress=gr.Progress()
-):
-    embeddings = STUDIO_SPEAKERS[speaker_name_studio] if speaker_type == 'Studio' else cloned_speakers[speaker_name_custom]
-    # break at line!
-    lines = text.split("---");
-    totalLines = len(lines);
-    print("Total parts:", len(lines))
-    audioNum = 0;
-    DefaultPrefix = next(tempfile._get_candidate_names());
-    CurrentPrefix = DefaultPrefix
-    # break pronuc
-    Pronuncs = ParsePronucs(pronunc)
-    AudioList = [];
-    for line in progress.tqdm(lines, desc="Gerando fala..."):
-        audioNum += 1;
-        textVars,cleanLine = ExtractVars(line)
-        if textVars['prefix']:
-            CurrentPrefix = textVars['prefix']
-        audioName = textVars['name'];
-        if audioName:
-            audioName = '_'+audioName
-        num = textVars['num'];
-        if not num:
-            num = audioNum;
-        path = CurrentPrefix +"_n_" + str(num)+audioName+".wav"
-        print("Generating audio for line", num, 'sequence', audioNum);
-        speaker = textVars['speaker'];
-        if not speaker:
-            speaker = speaker_name_studio if speaker_type == 'Studio' else speaker_name_custom
-        speakerName,embeddings = FindSpeakerByName(speaker, speaker_type)
-        if not speakerName:
-             raise ValueError("InvalidSpeaker: "+speakerName)
-        FixedText = cleanLine;
-        for pronunc in Pronuncs:
-            word = pronunc['word']
-            text = pronunc['text']
-            opts = pronunc['opts'];
-            flg = re.IGNORECASE
-            if 'cs' in opts:
-                flg = 0;
-            FixedText = re.sub(f'\\b{word}\\b', text, FixedText, flags=flg)
-        ipts = xtts.TTSInputs(
-            speaker_embedding=embeddings["speaker_embedding"],
-            gpt_cond_latent=embeddings["gpt_cond_latent"],
-            text=FixedText,
-            language=lang,
-            temperature=temperature,
-            speed=speed,
-            top_k=top_k,
-            top_p=top_p
-        )
-        generated_audio = xtts.predict_speech(ipts)
-        print("Audio generated.. Saving to", path);
-        generated_audio_path = os.path.join(AUDIOS_DIR, path)
-        with open(generated_audio_path, "wb") as fp:
-            fp.write(base64.b64decode(generated_audio))
-            AudioList.append(fp.name);
-    AllFileList.clear();
-    AllFileList.extend(AudioList);
-    return gr.Dropdown(
-            label="Generated Audios",
-            choices=list(AudioList),
-            value=AudioList[0]
-        )
-def get_file_content(f):
-    if len(f) > 0:
-        return f[0];
-    return None;
-def UpdateFileList(DirListState):
-    DirListState.clear();
-    DirListState.extend( os.listdir(AUDIOS_DIR) )
-def audio_list_update(d):
-    fullPath = abspath(d)
-    return fullPath
-def ZipAndDownload(files):
-    allFiles = files
-    DefaultPrefix = next(tempfile._get_candidate_names());
-    zipFile = abspath( os.path.join(ZIP_DIR, DefaultPrefix + ".zip") );
-    with zipfile.ZipFile(zipFile, 'w') as zipMe:
-        for file in allFiles:
-            print("Zipping", file);
-            zipMe.write(abspath(file), os.path.basename(file), compress_type=zipfile.ZIP_DEFLATED)
-    print("Pronto",  zipFile);
-    return '<a href="/file='+zipFile+'">If donwload dont starts, click here</a>';
-js = """
-function DetectDownloadLink(){
-    console.log('Configuring AutoDonwloadObservr...');
-    let hiddenLink = document.getElementById("DonwloadLink");
-    let onChange= function(mutations){
-         for (const mutation of mutations) {
-            if (mutation.type !== 'childList')
-                continue;
-              for (const addedNode of mutation.addedNodes) {
-                if (addedNode.nodeName === 'A') {
-                    location.href = addedNode.href;
-                }
-              }
-          }
-    }
-    let config = {  attributes: true, childList: true, subtree: true, attributeFilter: ["href"] }
-    let obs = new MutationObserver(onChange);
-    obs.observe(hiddenLink, config);
-}
-"""
-with gr.Blocks(js=js) as demo:
-    defaultSpeaker = "Dionisio Schuyler"
-    cloned_speaker_names = gr.State(list(cloned_speakers.keys()))
-    AllFileList = gr.State(list([]))
-    gr.Markdown("By using any functionality of this space, you agree to the terms of this license: https://coqui.ai/cpml")
-    with gr.Tab("TTS"):
-        with gr.Column() as row4:
-            with gr.Row() as col4:
-                speaker_type = gr.Dropdown(label="Speaker type", choices=["Studio", "Cloned"], value="Studio")
-                speaker_name_studio = gr.Dropdown(
-                    label="Studio speaker",
-                    choices=STUDIO_SPEAKERS.keys(),
-                    value=defaultSpeaker if defaultSpeaker in STUDIO_SPEAKERS.keys() else None,
-                )
-                speaker_name_custom = gr.Dropdown(
-                    label="Cloned speaker",
-                    choices=cloned_speaker_names.value,
-                    value=cloned_speaker_names.value[0] if len(cloned_speaker_names.value) != 0 else None,
-                )
-        with gr.Accordion("Advanced options", open=False):
-             with gr.Row() as rowAdvanced:
-                temperature = gr.Slider(0.00, 1.00, 0.5, step=0.05, label="Temperature", info="Choose between 0 and 1")
-                top_p = gr.Slider(0.00, 1.00, 0.8, step=0.05, label="TOP P", info="Choose between 0 and 1")
-                top_k = gr.Number(label="TOP K",value=50)
-                speed = gr.Slider(0.00, 1000.00, 1.0, step=0.1, label="Speed", info="Speed (0 to 1000)")
-        with gr.Column() as col2:
-            with gr.Row():
-                text = gr.Textbox(label="Text", info="Generate multiple audios separating lines with ---",lines=4, value="Customizado por IA Talking, o maior blog de Inteligência Artificial do Brasil!")
-                pronunc = gr.Textbox(label="Pronunciation Fix", info="Fix words pronuncation using WORD = SPEAK",lines=4)
-            with gr.Row():
-                lang = gr.Dropdown(label="Language", choices=LANUGAGES, value="pt")
-                tts_button = gr.Button(value="TTS")
-        with gr.Column() as col3:
-            # FileList = gr.FileExplorer(
-            #     glob="*.wav",
-            #     # value=["themes/utils"],
-            #     ignore_glob="**/__init__.py",
-            #     root_dir=AUDIOS_DIR,
-            #     interactive = True,
-            #     value=DirectoryList.value
-            # )
-            AudioList = gr.Dropdown(
-                    label="Generated Audios",
-                    choices=[]
-                    ,interactive=True
-                )
-            generated_audio = gr.Audio(label="Audio Play", autoplay=True)
-            AudioList.change(fn=audio_list_update, inputs=[AudioList], outputs=[generated_audio])
-            dummyHtml = gr.HTML(elem_id = "DonwloadLink", render = False);
-            downloadAll = gr.DownloadButton("Download All Files")
-            downloadAll.click(ZipAndDownload, inputs=[AllFileList], outputs=[dummyHtml]);
-            dummyHtml.render();
-    with gr.Tab("Clone a new speaker"):
-        with gr.Column() as col1:
-            upload_file = gr.Audio(label="Upload reference audio", type="filepath")
-            clone_speaker_name = gr.Textbox(label="Speaker name", value="default_speaker")
-            clone_button = gr.Button(value="Clone speaker")
-    with gr.Tab("Help"):
-         gr.Markdown("""
-Welcome to the XTTS WebUI version customized by the IA Talking blog (https://iatalk.ing).
-The main goal of this space is to share more scenarios on how XTTS can be used, as well as serve as a study resource to learn more about the TTS process and AI.
-In this version, we have some customizations that are quite useful.
-# Multiple audios
-You can generate multiple audios at once by separating the text with three dashes. For example:
-```
-Text 1
----
-Text 2, line 1
-Text 2, line 2
-```
-In the above example, 2 audio files will be generated! This is very useful when you want to generate a lot of audio but don't want to generate it all at once due to the context lost in XTTS.
-You can also specify variables that modify certain aspects.
-For example, `!speaker = Dionisio` forces the speaker to be Dionisio only for that specific audio.
-List of variables:
-- `speaker` = name of the speaker
-- `num` = file number (by default, it's the sequential number)
-- `prefix` = file name prefix
-# Pronunciation adjustment
-If you have a text that you cannot or do not want to change the content of, you can use the Pronunciation field to map words with different pronunciations.
-Simply separate them by each line. Example:
-```
-API = A,P,I
-SomeFunctionCode = Function Code
-```
-This is useful for mapping foreign words, abbreviations, acronyms, code, etc.
-         """)
-    clone_button.click(
-        fn=clone_speaker,
-        inputs=[upload_file, clone_speaker_name, cloned_speaker_names],
-        outputs=[upload_file, clone_speaker_name, cloned_speaker_names, speaker_name_custom],
-    )
-    tts_button.click(
-        fn=tts,
-        inputs=[text, pronunc,speaker_type, speaker_name_studio, speaker_name_custom, lang, temperature
-                ,speed,top_p,top_k,AllFileList
-                ],
-        outputs=[AudioList],
-    )
-if __name__ == "__main__" and DO_CHECK == "1":
-    print("Warming up server... Checking server healthy...")
-    speakerName, embs = random.choice(list(STUDIO_SPEAKERS.items()));
-    print("Testing with", speakerName);
-    ipts = xtts.TTSInputs(
-        speaker_embedding=embs["speaker_embedding"],
-        gpt_cond_latent=embs["gpt_cond_latent"],
-        text="This is a warmup request.",
-        language="en",
-        temperature=0.5,
-        speed=1.0,
-        top_k=50,
-        top_p=0.8
-    )
-    resp = xtts.predict_speech(ipts)
-    print(" TEST OK")
-if __name__ == "__main__":
-    print("STARTING...")
-    demo.launch(
-        share=False,
-        debug=False,
-        server_port=7860,
-        server_name="0.0.0.0",
-        allowed_paths=[ZIP_DIR]
-    )

+import gradio as gr
+import base64
+import tempfile
+import json
+import os
+from os.path import abspath
+import zipfile
+import random
+import xtts
+import re
+DO_CHECK = os.getenv('DO_CHECK', '1')
+OUTPUT = "./demo_outputs"
+cloned_speakers = {}
+print("Preparing file structure...")
+if not os.path.exists(OUTPUT):
+    os.mkdir(OUTPUT)
+    os.mkdir(os.path.join(OUTPUT, "cloned_speakers"))
+    os.mkdir(os.path.join(OUTPUT, "generated_audios"))
+elif os.path.exists(os.path.join(OUTPUT, "cloned_speakers")):
+    print("Loading existing cloned speakers...")
+    for file in os.listdir(os.path.join(OUTPUT, "cloned_speakers")):
+        if file.endswith(".json"):
+            with open(os.path.join(OUTPUT, "cloned_speakers", file), "r") as fp:
+                cloned_speakers[file[:-5]] = json.load(fp)
+    print("Available cloned speakers:", ", ".join(cloned_speakers.keys()))
+AUDIOS_DIR = os.path.join("demo_outputs", "generated_audios");
+ZIP_DIR = os.path.join("zip_outputs");
+print("Checking zip at", ZIP_DIR)
+if not os.path.exists(ZIP_DIR):
+    os.mkdir(ZIP_DIR)
+try:
+    print("Getting metadata from server ...")
+    LANUGAGES = xtts.get_languages()
+    print("Available languages:", ", ".join(LANUGAGES))
+    STUDIO_SPEAKERS = xtts.get_speakers()
+    print("Available studio speakers:", ", ".join(STUDIO_SPEAKERS.keys()))
+except:
+    raise Exception("Please make sure the server is running first.")
+def ExtractVars(input_string):
+    # Split the string into lines
+    lines = input_string.split('\n')
+    # Initialize an empty dictionary to store key-value pairs
+    result_dict = {
+         'prefix': None,
+         'name': '',
+         'speaker': None,
+         'num': None,
+    }
+    # List to hold lines that do not start with '!'
+    filtered_lines = []
+    # Iterate through each line
+    for line in lines:
+        # Check if the line starts with '!'
+        if line.strip().startswith('!'):
+            # Try to split the line into key and value parts
+            try:
+                # Split on '=' and strip whitespace from key and value
+                key, value = line.strip()[1:].split('=')
+                key = key.strip()
+                value = value.strip()
+                # Add to dictionary
+                result_dict[key] = value
+            except ValueError:
+                # Handle the case where there is no '=' or improper format
+                continue
+        elif len(line.strip()) > 0:
+            # Add the line to filtered_lines if it doesn't start with '!'
+            filtered_lines.append(line)
+    # Join the filtered lines back into a single string
+    filtered_string = '\n'.join(filtered_lines)
+    return result_dict, filtered_string
+def ParsePronucs(PronuncStr):
+    # Split the string into lines
+    lines = PronuncStr.split('\n')
+    # Initialize an empty dictionary to store key-value pairs
+    PronuncWords =  []
+    # Iterate through each line
+    for line in lines:
+        if len(line.strip()) > 0:
+            word,*text = line.strip().split('=',1)
+            word = word.strip()
+            text,*opts = text[0].split("|",1);
+            text = text.strip();
+            if len(opts) > 0:
+                opts = opts[0].strip().split(",");
+            else:
+                opts = [];
+            PronuncWords.append({'word':word, 'text':text, 'opts':opts})
+    return PronuncWords
+def FindSpeakerByName(name, speakerType):
+    srcItems = STUDIO_SPEAKERS if speakerType == "Studio" else cloned_speakers;
+    for key, value in srcItems.items():
+        if key == name:
+            return key,value
+        if key.split(" ")[0] == name:
+            return key,value;
+def clone_speaker(upload_file, clone_speaker_name, cloned_speaker_names):
+    embeddings =  xtts.predict_speaker(upload_file)
+    with open(os.path.join(OUTPUT, "cloned_speakers", clone_speaker_name + ".json"), "w") as fp:
+        json.dump(embeddings, fp)
+    cloned_speakers[clone_speaker_name] = embeddings
+    cloned_speaker_names.append(clone_speaker_name)
+    return upload_file, clone_speaker_name, cloned_speaker_names, gr.Dropdown(choices=cloned_speaker_names)
+def tts(text, pronunc, speaker_type, speaker_name_studio, speaker_name_custom, lang, temperature
+    ,speed,top_p,top_k, AllFileList,progress=gr.Progress()
+):
+    embeddings = STUDIO_SPEAKERS[speaker_name_studio] if speaker_type == 'Studio' else cloned_speakers[speaker_name_custom]
+    # break at line!
+    lines = text.split("---");
+    totalLines = len(lines);
+    print("Total parts:", len(lines))
+    audioNum = 0;
+    DefaultPrefix = next(tempfile._get_candidate_names());
+    CurrentPrefix = DefaultPrefix
+    # break pronuc
+    Pronuncs = ParsePronucs(pronunc)
+    AudioList = [];
+    for line in progress.tqdm(lines, desc="Gerando fala..."):
+        audioNum += 1;
+        textVars,cleanLine = ExtractVars(line)
+        if textVars['prefix']:
+            CurrentPrefix = textVars['prefix']
+        audioName = textVars['name'];
+        if audioName:
+            audioName = '_'+audioName
+        num = textVars['num'];
+        if not num:
+            num = audioNum;
+        path = CurrentPrefix +"_n_" + str(num)+audioName+".wav"
+        print("Generating audio for line", num, 'sequence', audioNum);
+        speaker = textVars['speaker'];
+        if not speaker:
+            speaker = speaker_name_studio if speaker_type == 'Studio' else speaker_name_custom
+        speakerName,embeddings = FindSpeakerByName(speaker, speaker_type)
+        if not speakerName:
+             raise ValueError("InvalidSpeaker: "+speakerName)
+        FixedText = cleanLine;
+        for pronunc in Pronuncs:
+            word = pronunc['word']
+            text = pronunc['text']
+            opts = pronunc['opts'];
+            flg = re.IGNORECASE
+            if 'cs' in opts:
+                flg = 0;
+            FixedText = re.sub(f'\\b{word}\\b', text, FixedText, flags=flg)
+        ipts = xtts.TTSInputs(
+            speaker_embedding=embeddings["speaker_embedding"],
+            gpt_cond_latent=embeddings["gpt_cond_latent"],
+            text=FixedText,
+            language=lang,
+            temperature=temperature,
+            speed=speed,
+            top_k=top_k,
+            top_p=top_p
+        )
+        generated_audio = xtts.predict_speech(ipts)
+        print("Audio generated.. Saving to", path);
+        generated_audio_path = os.path.join(AUDIOS_DIR, path)
+        with open(generated_audio_path, "wb") as fp:
+            fp.write(base64.b64decode(generated_audio))
+            AudioList.append(fp.name);
+    AllFileList.clear();
+    AllFileList.extend(AudioList);
+    return gr.Dropdown(
+            label="Generated Audios",
+            choices=list(AudioList),
+            value=AudioList[0]
+        )
+def get_file_content(f):
+    if len(f) > 0:
+        return f[0];
+    return None;
+def UpdateFileList(DirListState):
+    DirListState.clear();
+    DirListState.extend( os.listdir(AUDIOS_DIR) )
+def audio_list_update(d):
+    fullPath = abspath(d)
+    return fullPath
+def ZipAndDownload(files):
+    allFiles = files
+    DefaultPrefix = next(tempfile._get_candidate_names());
+    zipFile = abspath( os.path.join(ZIP_DIR, DefaultPrefix + ".zip") );
+    with zipfile.ZipFile(zipFile, 'w') as zipMe:
+        for file in allFiles:
+            print("Zipping", file);
+            zipMe.write(abspath(file), os.path.basename(file), compress_type=zipfile.ZIP_DEFLATED)
+    print("Pronto",  zipFile);
+    return '<a href="/file='+zipFile+'">If donwload dont starts, click here</a>';
+js = """
+function DetectDownloadLink(){
+    console.log('Configuring AutoDonwloadObservr...');
+    let hiddenLink = document.getElementById("DonwloadLink");
+    let onChange= function(mutations){
+         for (const mutation of mutations) {
+            if (mutation.type !== 'childList')
+                continue;
+              for (const addedNode of mutation.addedNodes) {
+                if (addedNode.nodeName === 'A') {
+                    location.href = addedNode.href;
+                }
+              }
+          }
+    }
+    let config = {  attributes: true, childList: true, subtree: true, attributeFilter: ["href"] }
+    let obs = new MutationObserver(onChange);
+    obs.observe(hiddenLink, config);
+}
+"""
+with gr.Blocks(js=js) as demo:
+    defaultSpeaker = "Dionisio Schuyler"
+    cloned_speaker_names = gr.State(list(cloned_speakers.keys()))
+    AllFileList = gr.State(list([]))
+    gr.Markdown("By using any functionality of this space, you agree to the terms of this license: https://coqui.ai/cpml")
+    with gr.Tab("TTS"):
+        with gr.Column() as row4:
+            with gr.Row() as col4:
+                speaker_type = gr.Dropdown(label="Speaker type", choices=["Studio", "Cloned"], value="Studio")
+                speaker_name_studio = gr.Dropdown(
+                    label="Studio speaker",
+                    choices=STUDIO_SPEAKERS.keys(),
+                    value=defaultSpeaker if defaultSpeaker in STUDIO_SPEAKERS.keys() else None,
+                )
+                speaker_name_custom = gr.Dropdown(
+                    label="Cloned speaker",
+                    choices=cloned_speaker_names.value,
+                    value=cloned_speaker_names.value[0] if len(cloned_speaker_names.value) != 0 else None,
+                )
+        with gr.Accordion("Advanced options", open=False):
+             with gr.Row() as rowAdvanced:
+                temperature = gr.Slider(0.00, 1.00, 0.5, step=0.05, label="Temperature", info="Choose between 0 and 1")
+                top_p = gr.Slider(0.00, 1.00, 0.8, step=0.05, label="TOP P", info="Choose between 0 and 1")
+                top_k = gr.Number(label="TOP K",value=50)
+                speed = gr.Slider(0.00, 1000.00, 1.0, step=0.1, label="Speed", info="Speed (0 to 1000)")
+        with gr.Column() as col2:
+            with gr.Row():
+                text = gr.Textbox(label="Text", info="Generate multiple audios separating lines with ---",lines=4, value="Customizado por IA Talking, o maior blog de Inteligência Artificial do Brasil!")
+                pronunc = gr.Textbox(label="Pronunciation Fix", info="Fix words pronuncation using WORD = SPEAK",lines=4)
+            with gr.Row():
+                lang = gr.Dropdown(label="Language", choices=LANUGAGES, value="pt")
+                tts_button = gr.Button(value="TTS")
+        with gr.Column() as col3:
+            # FileList = gr.FileExplorer(
+            #     glob="*.wav",
+            #     # value=["themes/utils"],
+            #     ignore_glob="**/__init__.py",
+            #     root_dir=AUDIOS_DIR,
+            #     interactive = True,
+            #     value=DirectoryList.value
+            # )
+            AudioList = gr.Dropdown(
+                    label="Generated Audios",
+                    choices=[]
+                    ,interactive=True
+                )
+            generated_audio = gr.Audio(label="Audio Play", autoplay=True)
+            AudioList.change(fn=audio_list_update, inputs=[AudioList], outputs=[generated_audio])
+            dummyHtml = gr.HTML(elem_id = "DonwloadLink", render = False);
+            downloadAll = gr.DownloadButton("Download All Files")
+            downloadAll.click(ZipAndDownload, inputs=[AllFileList], outputs=[dummyHtml]);
+            dummyHtml.render();
+    with gr.Tab("Clone a new speaker"):
+        with gr.Column() as col1:
+            upload_file = gr.Audio(label="Upload reference audio", type="filepath")
+            clone_speaker_name = gr.Textbox(label="Speaker name", value="default_speaker")
+            clone_button = gr.Button(value="Clone speaker")
+    with gr.Tab("Help"):
+         gr.Markdown("""
+Welcome to the XTTS WebUI version customized by the IA Talking blog (https://iatalk.ing).
+The main goal of this space is to share more scenarios on how XTTS can be used, as well as serve as a study resource to learn more about the TTS process and AI.
+In this version, we have some customizations that are quite useful.
+# Multiple audios
+You can generate multiple audios at once by separating the text with three dashes. For example:
+```
+Text 1
+---
+Text 2, line 1
+Text 2, line 2
+```
+In the above example, 2 audio files will be generated! This is very useful when you want to generate a lot of audio but don't want to generate it all at once due to the context lost in XTTS.
+You can also specify variables that modify certain aspects.
+For example, `!speaker = Dionisio` forces the speaker to be Dionisio only for that specific audio.
+List of variables:
+- `speaker` = name of the speaker
+- `num` = file number (by default, it's the sequential number)
+- `prefix` = file name prefix
+# Pronunciation adjustment
+If you have a text that you cannot or do not want to change the content of, you can use the Pronunciation field to map words with different pronunciations.
+Simply separate them by each line. Example:
+```
+API = A,P,I
+SomeFunctionCode = Function Code
+```
+This is useful for mapping foreign words, abbreviations, acronyms, code, etc.
+         """)
+    clone_button.click(
+        fn=clone_speaker,
+        inputs=[upload_file, clone_speaker_name, cloned_speaker_names],
+        outputs=[upload_file, clone_speaker_name, cloned_speaker_names, speaker_name_custom],
+    )
+    tts_button.click(
+        fn=tts,
+        inputs=[text, pronunc,speaker_type, speaker_name_studio, speaker_name_custom, lang, temperature
+                ,speed,top_p,top_k,AllFileList
+                ],
+        outputs=[AudioList],
+    )
+if __name__ == "__main__" and DO_CHECK == "1":
+    print("Warming up server... Checking server healthy...")
+    speakerName, embs = random.choice(list(STUDIO_SPEAKERS.items()));
+    print("Testing with", speakerName);
+    ipts = xtts.TTSInputs(
+        speaker_embedding=embs["speaker_embedding"],
+        gpt_cond_latent=embs["gpt_cond_latent"],
+        text="This is a warmup request.",
+        language="en",
+        temperature=0.5,
+        speed=1.0,
+        top_k=50,
+        top_p=0.8
+    )
+    resp = xtts.predict_speech(ipts)
+    print(" TEST OK")
+if __name__ == "__main__":
+    print("STARTING...")
+    demo.launch(
+        share=False,
+        debug=False,
+        server_port=7860,
+        server_name="0.0.0.0",
+        allowed_paths=[ZIP_DIR],
+        ssr_mode=False
+    )