Spaces:
Runtime error
Runtime error
import gradio as gr | |
import librosa | |
import numpy as np | |
import torch | |
from diffusers import SpectrogramDiffusionPipeline, MidiProcessor | |
pipe = SpectrogramDiffusionPipeline.from_pretrained("google/music-spectrogram-diffusion") | |
pipe = pipe.to("cuda") | |
processor = MidiProcessor() | |
def predict(audio_file_pth): | |
# audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels)) | |
output = pipe(processor(audio_file_pth.name)[:2]) | |
audio = output.audios[0] | |
return (16000, audio.ravel()) | |
title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion" | |
description = """ | |
In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime. | |
This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments. | |
They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter. | |
""" | |
examples = [] | |
gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.File(file_count="single", file_types=[".mid"]), | |
], | |
outputs=[ | |
gr.Audio(label="Synthesised Music", type="numpy"), | |
], | |
title=title, | |
description=description, | |
theme='gstaff/xkcd', | |
).launch(debug=True) |