reach-vb HF staff commited on
Commit
89d7ff4
1 Parent(s): 00be739

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import torch
5
+
6
+ from diffusers import SpectrogramDiffusionPipeline, MidiProcessor
7
+
8
+ pipe = SpectrogramDiffusionPipeline.from_pretrained("google/music-spectrogram-diffusion")
9
+ pipe = pipe.to("cuda")
10
+ processor = MidiProcessor()
11
+
12
+
13
+ def predict(audio_file_pth):
14
+ # audio = tuple (sample_rate, frames) or (sample_rate, (frames, channels))
15
+
16
+ output = pipe(processor(audio_file_pth.name)[:2])
17
+ audio = output.audios[0]
18
+
19
+ return (16000, audio.ravel())
20
+
21
+
22
+ title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion"
23
+
24
+ description = """
25
+ In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime.
26
+ This enables training on a wide range of transcrip- tion datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments.
27
+
28
+ They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter.
29
+ """
30
+
31
+
32
+ gr.Interface(
33
+ fn=predict,
34
+ inputs=[
35
+ gr.File(file_count="single", file_types=[".mid"]),
36
+ ],
37
+ outputs=[
38
+ gr.Audio(label="Synthesised Music", type="numpy"),
39
+ ],
40
+ title=title,
41
+ description=description,
42
+ ).launch(debug=True)