fffiloni's picture
Update app.py
e236784
raw
history blame
2.08 kB
import gradio as gr
import os
import shutil
from huggingface_hub import snapshot_download
import numpy as np
from scipy.io import wavfile
model_ids = [
'suno/bark',
]
for model_id in model_ids:
model_name = model_id.split('/')[-1]
snapshot_download(model_id, local_dir=f'checkpoints/{model_name}')
from TTS.tts.configs.bark_config import BarkConfig
from TTS.tts.models.bark import Bark
config = BarkConfig()
model = Bark.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir="checkpoints/bark", eval=True)
def infer(prompt, input_wav_file):
# Path to your WAV file
source_path = input_wav_file
# Destination directory
destination_directory = "bark_voices"
# Extract the file name without the extension
file_name = os.path.splitext(os.path.basename(source_path))[0]
# Construct the full destination directory path
destination_path = os.path.join(destination_directory, file_name)
# Create the new directory
os.makedirs(destination_path, exist_ok=True)
# Move the WAV file to the new directory
shutil.move(source_path, os.path.join(destination_path, f"{file_name}.wav"))
text = prompt
# with random speaker
#output_dict = model.synthesize(text, config, speaker_id="random", voice_dirs=None)
# cloning a speaker.
# It assumes that you have a speaker file in `bark_voices/speaker_n/speaker.wav` or `bark_voices/speaker_n/speaker.npz`
output_dict = model.synthesize(text, config, speaker_id=f"{file_name}", voice_dirs="bark_voices/")
print(output_dict)
sample_rate = 24000 # Replace with the actual sample rate
wavfile.write('output.wav', sample_rate, output_dict['wav'])
return "output.wav"
gr.Interface(fn=infer,
inputs=[gr.Textbox(label="Text to speech prompt"),
gr.Audio(
label="WAV voice to clone",
type="filepath",
source="upload")],
outputs=[gr.Audio()],
title="Instant Voice Cloning").launch()