Spaces:
Runtime error
Runtime error
File size: 1,485 Bytes
89d7ff4 1381480 89d7ff4 1381480 89d7ff4 1381480 89d7ff4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import gradio as gr
import librosa
import numpy as np
import torch
from diffusers import SpectrogramDiffusionPipeline, MidiProcessor
pipe = SpectrogramDiffusionPipeline.from_pretrained("google/music-spectrogram-diffusion")
pipe = pipe.to("cuda")
processor = MidiProcessor()
def predict(audio_file_pth):
# audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
output = pipe(processor(audio_file_pth.name)[:2])
audio = output.audios[0]
return (16000, audio.ravel())
title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion"
description = """
In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime.
This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments.
They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter.
"""
examples = []
gr.Interface(
fn=predict,
inputs=[
gr.File(file_count="single", file_types=[".mid"]),
],
outputs=[
gr.Audio(label="Synthesised Music", type="numpy"),
],
title=title,
description=description,
theme='gstaff/xkcd',
).launch(debug=True) |