Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import bark | |
from bark import generate_audio, preload_models, SAMPLE_RATE | |
import time | |
import scipy | |
import noisereduce as nr | |
import bark | |
from transformers import BertTokenizer | |
######################## | |
##### Voice cloning functionality | |
# make sure to only use CPU | |
os.environ["CUDA_VISIBLE_DEVICES"] = "" | |
os.environ["SUNO_USE_SMALL_MODELS"] = "1" | |
# make sure to download BERT tokenizer | |
BertTokenizer.from_pretrained("bert-base-multilingual-cased") | |
# Do not re-download the models when loading them | |
bark.generation.CACHE_DIR = "bark_models" | |
def generate_cloned_voice_audio(text_prompt): | |
print("="*10) | |
print("NOW READING:") | |
print(text_prompt) | |
print("="*10) | |
# load voice file | |
history_prompt = "pm_voice.npz" | |
# keep track of duration | |
t0 = time.time() | |
# generate cloned voice audio | |
audio_array = generate_audio( | |
text_prompt, | |
history_prompt = history_prompt | |
) | |
# keep track of duration | |
generation_duration_s = time.time() - t0 | |
audio_duration_s = audio_array.shape[0] / SAMPLE_RATE | |
print(f"took {generation_duration_s:.0f}s to generate {audio_duration_s:.0f}s of audio") | |
# reduce noise | |
reduced_noise_audio_array = nr.reduce_noise(y=audio_array, sr=SAMPLE_RATE) | |
# write to file | |
audio_output_path = "output_audio.wav" | |
noisereduced_audio_output_path = "output_noisereduced_audio.wav" | |
scipy.io.wavfile.write(audio_output_path, rate=SAMPLE_RATE, data=audio_array) | |
scipy.io.wavfile.write(noisereduced_audio_output_path, rate=SAMPLE_RATE, data=reduced_noise_audio_array) | |
return (SAMPLE_RATE, audio_array) | |
######################## | |
def greet(name): | |
if os.path.isfile("pm_voice.npz"): | |
preffix = "Found the voice file" | |
else: | |
preffix = "Voice file not found" | |
return "Hello " + name + "!!" + preffix | |
output_audio = gr.Audio( | |
# format = "ogg", | |
label = "My cloned voice reading your text", | |
) | |
iface = gr.Interface( | |
fn=generate_cloned_voice_audio, | |
inputs="text", | |
outputs=output_audio | |
) | |
iface.launch(share=True) |