Spaces:
Runtime error
Runtime error
import gradio as gr | |
import librosa | |
import numpy as np | |
import torch | |
import random | |
from diffusers import SpectrogramDiffusionPipeline, MidiProcessor | |
pipe = SpectrogramDiffusionPipeline.from_pretrained( | |
"google/music-spectrogram-diffusion", torch_dtype=torch.float16 | |
).to("cuda") | |
pipe.enable_xformers_memory_efficient_attention() | |
processor = MidiProcessor() | |
COLORS = [ | |
["#ff0000", "#00ff00"], | |
["#00ff00", "#0000ff"], | |
["#0000ff", "#ff0000"], | |
] | |
def predict(audio_file_pth): | |
with torch.inference_mode(): | |
output = pipe(processor(audio_file_pth.name)[:5]) | |
audio = output.audios[0] | |
return gr.make_waveform((16000, audio.ravel()), bars_color=random.choice(COLORS), bar_count=75) | |
title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion" | |
description = """ | |
In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime. | |
This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments. | |
They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter. | |
""" | |
examples = ["examples/beethoven_mond_2.mid", "examples/beethoven_hammerklavier_2.mid"] | |
gr.HTML(""" | |
<p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings. | |
<br/> | |
<a href="https://huggingface.co/spaces/reach-vb/music-spectrogram-diffusion?duplicate=true"> | |
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> | |
</p>""") | |
article = """ | |
<div style='margin:20px auto;'> | |
<p>References: <a href="https://arxiv.org/abs/2206.05408">Music Spectrogram Diffusion paper</a> | | |
<a href="https://github.com/magenta/music-spectrogram-diffusion">original GitHub</a> | | |
<a href="https://github.com/magenta/music-spectrogram-diffusion#pretrained-models">original weights</a></p> | |
<pre> | |
@article{hawthorne2022multi, | |
title={Multi-instrument music synthesis with spectrogram diffusion}, | |
author={Hawthorne, Curtis and Simon, Ian and Roberts, Adam and Zeghidour, Neil and Gardner, Josh and Manilow, Ethan and Engel, Jesse}, | |
journal={arXiv preprint arXiv:2206.05408}, | |
year={2022} | |
} | |
</pre> | |
</div> | |
""" | |
gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.File(label="Upload MIDI", file_count="single", file_types=[".mid"]), | |
], | |
outputs=[ | |
gr.Video(label="Synthesised Music"), | |
], | |
title=title, | |
description=description, | |
theme="gradio/monochrome", | |
examples=examples, | |
article=article, | |
).launch(debug=True) | |