Spaces:
Running
Running
import os | |
import gradio as gr | |
import librosa | |
import soundfile as sf | |
def inference(audio_1, audio_2): | |
# Load the audio files | |
translated_audio, sr_tr = librosa.load(audio_1, sr=None) | |
background_audio, sr_bg = librosa.load(audio_2, sr=None) | |
# Ensure both audio files have the same sample rate | |
if sr_bg != sr_tr: | |
translated_audio = librosa.resample(translated_audio, sr_tr, sr_bg) | |
sr = sr_bg | |
else: | |
sr = sr_bg # or sr_tr, they're the same | |
# Pad or truncate the audio to make them the same length | |
max_len = max(len(background_audio), len(translated_audio)) | |
background_audio = librosa.util.fix_length(background_audio, max_len) | |
translated_audio = librosa.util.fix_length(translated_audio, max_len) | |
# Mix the audio | |
full_audio = background_audio + translated_audio | |
# Normalize to prevent clipping | |
full_audio = librosa.util.normalize(full_audio) | |
sf.write("test.wav", full_audio, sr) | |
# Write the output | |
return "test.wav" | |
title = "음성 합성" | |
demo = gr.Interface( | |
inference, | |
[gr.Audio(type="filepath", label="Vocals"),gr.Audio(type="filepath", label="배경음")], | |
gr.Audio(type="filepath", label="합성 결과"), | |
title=title, | |
) | |
demo.queue(max_size=1) | |
demo.launch(debug=True) |