LuisVSpeaks / app.py
LuisAVasquez's picture
Update app.py
e19fa2d verified
raw
history blame
2.1 kB
import gradio as gr
import os
import bark
from bark import generate_audio, preload_models, SAMPLE_RATE
import time
import scipy
import noisereduce as nr
import bark
from transformers import BertTokenizer
########################
##### Voice cloning functionality
# make sure to only use CPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ["SUNO_USE_SMALL_MODELS"] = "1"
# make sure to download BERT tokenizer
BertTokenizer.from_pretrained("bert-base-multilingual-cased")
# Do not re-download the models when loading them
bark.generation.CACHE_DIR = "bark_models"
def generate_cloned_voice_audio(text_prompt):
print("="*10)
print("NOW READING:")
print(text_prompt)
print("="*10)
# load voice file
history_prompt = "pm_voice.npz"
# keep track of duration
t0 = time.time()
# generate cloned voice audio
audio_array = generate_audio(
text_prompt,
history_prompt = history_prompt
)
# keep track of duration
generation_duration_s = time.time() - t0
audio_duration_s = audio_array.shape[0] / SAMPLE_RATE
print(f"took {generation_duration_s:.0f}s to generate {audio_duration_s:.0f}s of audio")
# reduce noise
reduced_noise_audio_array = nr.reduce_noise(y=audio_array, sr=SAMPLE_RATE)
# write to file
audio_output_path = "output_audio.wav"
noisereduced_audio_output_path = "output_noisereduced_audio.wav"
scipy.io.wavfile.write(audio_output_path, rate=SAMPLE_RATE, data=audio_array)
scipy.io.wavfile.write(noisereduced_audio_output_path, rate=SAMPLE_RATE, data=reduced_noise_audio_array)
return (SAMPLE_RATE, audio_array)
########################
def greet(name):
if os.path.isfile("pm_voice.npz"):
preffix = "Found the voice file"
else:
preffix = "Voice file not found"
return "Hello " + name + "!!" + preffix
output_audio = gr.Audio(
# format = "ogg",
label = "My cloned voice reading your text",
)
iface = gr.Interface(
fn=generate_cloned_voice_audio,
inputs="text",
outputs=output_audio
)
iface.launch(share=True)