Spaces:

amphion
/

PicoAudio

Running on Zero

App Files Files Community

ZeyuXie commited on Jul 16, 2024

Commit

aca4b77

verified ·

1 Parent(s): 5f1d3d1

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -2

app.py CHANGED Viewed

@@ -4,8 +4,10 @@ import json
 import numpy as np
 import torch
 import soundfile as sf
 from diffusers import DDPMScheduler
 from pico_model import PicoDiffusion, build_pretrained_models
 class dotdict(dict):
     """dot.notation access to dictionary attributes"""
@@ -15,7 +17,11 @@ class dotdict(dict):
 class InferRunner:
     def __init__(self):
-        self.vae, _ = build_pretrained_models("audioldm-s-full")
         train_args = dotdict(json.loads(open("ckpts/pico_model/summary.jsonl").readlines()[0]))
         self.pico_model = PicoDiffusion(
             scheduler_name=train_args.scheduler_name,
@@ -23,7 +29,7 @@ class InferRunner:
             snr_gamma=train_args.snr_gamma,
             freeze_text_encoder_ckpt="ckpts/laion_clap/630k-audioset-best.pt",
             diffusion_pt="ckpts/pico_model/diffusion.pt",
-        ).cuda().eval()
         self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
 def infer(caption, runner):
@@ -34,6 +40,12 @@ def infer(caption, runner):
     sf.write(f"synthesized/{caption}.wav", wave, samplerate=16000, subtype='PCM_16')
 infer_runner = InferRunner()
 with gr.Blocks() as demo:
     with gr.Row():

 import numpy as np
 import torch
 import soundfile as sf
+import gradio as gr
 from diffusers import DDPMScheduler
 from pico_model import PicoDiffusion, build_pretrained_models
+from audioldm.variational_autoencoder.autoencoder import AutoencoderKL
 class dotdict(dict):
     """dot.notation access to dictionary attributes"""
 class InferRunner:
     def __init__(self):
+        vae_config = json.load(open("ckpts/ldm/vae_config.json".format(path)))
+        self.vae = AutoencoderKL(**vae_config).to(device)
+        vae_weights = torch.load("ckpts/ldm/pytorch_model_vae.bin".format(path), map_location=device)
+        self.vae.load_state_dict(vae_weights)
         train_args = dotdict(json.loads(open("ckpts/pico_model/summary.jsonl").readlines()[0]))
         self.pico_model = PicoDiffusion(
             scheduler_name=train_args.scheduler_name,
             snr_gamma=train_args.snr_gamma,
             freeze_text_encoder_ckpt="ckpts/laion_clap/630k-audioset-best.pt",
             diffusion_pt="ckpts/pico_model/diffusion.pt",
+        ).eval().to(device)
         self.scheduler = DDPMScheduler.from_pretrained(train_args.scheduler_name, subfolder="scheduler")
 def infer(caption, runner):
     sf.write(f"synthesized/{caption}.wav", wave, samplerate=16000, subtype='PCM_16')
 infer_runner = InferRunner()
+if torch.cuda.is_available():
+    device = "cuda"
+    device_selection = "cuda:0"
+else:
+    device = "cpu"
+    device_selection = "cpu"
 with gr.Blocks() as demo:
     with gr.Row():