Spaces:

reach-vb
/

music-spectrogram-diffusion

Runtime error

reach-vb HF staff commited on Mar 29, 2023

Commit

89d7ff4

•

1 Parent(s): 00be739

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import librosa
+import numpy as np
+import torch
+from diffusers import SpectrogramDiffusionPipeline, MidiProcessor
+pipe = SpectrogramDiffusionPipeline.from_pretrained("google/music-spectrogram-diffusion")
+pipe = pipe.to("cuda")
+processor = MidiProcessor()
+def predict(audio_file_pth):
+    # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
+    output = pipe(processor(audio_file_pth.name)[:2])
+    audio = output.audios[0]
+    return (16000, audio.ravel())
+title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion"
+description = """
+In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime.
+This enables training on a wide range of transcrip- tion datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments.
+They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter.
+"""
+gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.File(file_count="single", file_types=[".mid"]),
+    ],
+    outputs=[
+        gr.Audio(label="Synthesised Music", type="numpy"),
+    ],
+    title=title,
+    description=description,
+).launch(debug=True)