Spaces:
Runtime error
Runtime error
File size: 2,838 Bytes
89d7ff4 3918537 89d7ff4 f04e297 ba2b8a5 89d7ff4 3918537 89d7ff4 ba2b8a5 4b50ebb ba2b8a5 89d7ff4 3918537 89d7ff4 1381480 89d7ff4 f1a443d 89d7ff4 59b84ef 89d7ff4 ba2b8a5 89d7ff4 4b50ebb 89d7ff4 f04e297 f1a443d 59b84ef f04e297 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import librosa
import numpy as np
import torch
import random
from diffusers import SpectrogramDiffusionPipeline, MidiProcessor
pipe = SpectrogramDiffusionPipeline.from_pretrained(
"google/music-spectrogram-diffusion", torch_dtype=torch.float16
).to("cuda")
pipe.enable_xformers_memory_efficient_attention()
processor = MidiProcessor()
COLORS = [
["#ff0000", "#00ff00"],
["#00ff00", "#0000ff"],
["#0000ff", "#ff0000"],
]
def predict(audio_file_pth):
with torch.inference_mode():
output = pipe(processor(audio_file_pth.name)[:5])
audio = output.audios[0]
return gr.make_waveform((16000, audio.ravel()), bars_color=random.choice(COLORS), bar_count=75)
title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion"
description = """
In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime.
This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments.
They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter.
"""
examples = ["examples/beethoven_mond_2.mid", "examples/beethoven_hammerklavier_2.mid"]
gr.HTML("""
<p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
<br/>
<a href="https://huggingface.co/spaces/reach-vb/music-spectrogram-diffusion?duplicate=true">
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
</p>""")
article = """
<div style='margin:20px auto;'>
<p>References: <a href="https://arxiv.org/abs/2206.05408">Music Spectrogram Diffusion paper</a> |
<a href="https://github.com/magenta/music-spectrogram-diffusion">original GitHub</a> |
<a href="https://github.com/magenta/music-spectrogram-diffusion#pretrained-models">original weights</a></p>
<pre>
@article{hawthorne2022multi,
title={Multi-instrument music synthesis with spectrogram diffusion},
author={Hawthorne, Curtis and Simon, Ian and Roberts, Adam and Zeghidour, Neil and Gardner, Josh and Manilow, Ethan and Engel, Jesse},
journal={arXiv preprint arXiv:2206.05408},
year={2022}
}
</pre>
</div>
"""
gr.Interface(
fn=predict,
inputs=[
gr.File(label="Upload MIDI", file_count="single", file_types=[".mid"]),
],
outputs=[
gr.Video(label="Synthesised Music"),
],
title=title,
description=description,
theme="gradio/monochrome",
examples=examples,
article=article,
).launch(debug=True)
|