import gradio as gr import json import torch import time import random try: # Only on HuggingFace import spaces is_space_imported = True except ImportError: is_space_imported = False from tqdm import tqdm from huggingface_hub import snapshot_download from models import AudioDiffusion, DDPMScheduler from audioldm.audio.stft import TacotronSTFT from audioldm.variational_autoencoder import AutoencoderKL from pydub import AudioSegment # Old import import numpy as np import torch.nn.functional as F from torchvision.transforms.functional import normalize from huggingface_hub import hf_hub_download from gradio_imageslider import ImageSlider from briarmbg import BriaRMBG import PIL from PIL import Image from typing import Tuple max_64_bit_int = 2**63 - 1 # Automatic device detection if torch.cuda.is_available(): device_type = "cuda" device_selection = "cuda:0" else: device_type = "cpu" device_selection = "cpu" class Tango: def __init__(self, name = "declare-lab/tango2", device = device_selection): path = snapshot_download(repo_id = name) vae_config = json.load(open("{}/vae_config.json".format(path))) stft_config = json.load(open("{}/stft_config.json".format(path))) main_config = json.load(open("{}/main_config.json".format(path))) self.vae = AutoencoderKL(**vae_config).to(device) self.stft = TacotronSTFT(**stft_config).to(device) self.model = AudioDiffusion(**main_config).to(device) vae_weights = torch.load("{}/pytorch_model_vae.bin".format(path), map_location = device) stft_weights = torch.load("{}/pytorch_model_stft.bin".format(path), map_location = device) main_weights = torch.load("{}/pytorch_model_main.bin".format(path), map_location = device) self.vae.load_state_dict(vae_weights) self.stft.load_state_dict(stft_weights) self.model.load_state_dict(main_weights) print ("Successfully loaded checkpoint from:", name) self.vae.eval() self.stft.eval() self.model.eval() self.scheduler = DDPMScheduler.from_pretrained(main_config["scheduler_name"], subfolder = "scheduler") def chunks(self, lst, n): # Yield successive n-sized chunks from a list for i in range(0, len(lst), n): yield lst[i:i + n] def generate(self, prompt, steps = 100, guidance = 3, samples = 1, disable_progress = True): # Generate audio for a single prompt string with torch.no_grad(): latents = self.model.inference([prompt], self.scheduler, steps, guidance, samples, disable_progress = disable_progress) mel = self.vae.decode_first_stage(latents) wave = self.vae.decode_to_waveform(mel) return wave def generate_for_batch(self, prompts, steps = 200, guidance = 3, samples = 1, batch_size = 8, disable_progress = True): # Generate audio for a list of prompt strings outputs = [] for k in tqdm(range(0, len(prompts), batch_size)): batch = prompts[k: k + batch_size] with torch.no_grad(): latents = self.model.inference(batch, self.scheduler, steps, guidance, samples, disable_progress = disable_progress) mel = self.vae.decode_first_stage(latents) wave = self.vae.decode_to_waveform(mel) outputs += [item for item in wave] if samples == 1: return outputs return list(self.chunks(outputs, samples)) # Initialize TANGO tango = Tango(device = "cpu") tango.vae.to(device_type) tango.stft.to(device_type) tango.model.to(device_type) def update_seed(is_randomize_seed, seed): if is_randomize_seed: return random.randint(0, max_64_bit_int) return seed def check( prompt, output_number, steps, guidance, is_randomize_seed, seed ): if prompt is None or prompt == "": raise gr.Error("Please provide a prompt input.") if not output_number in [1, 2, 3]: raise gr.Error("Please ask for 1, 2 or 3 output files.") def update_output(output_format, output_number): return [ gr.update(format = output_format), gr.update(format = output_format, visible = (2 <= output_number)), gr.update(format = output_format, visible = (output_number == 3)), gr.update(visible = False) ] def text2audio( prompt, output_number, steps, guidance, is_randomize_seed, seed ): start = time.time() if seed is None: seed = random.randint(0, max_64_bit_int) random.seed(seed) torch.manual_seed(seed) output_wave = tango.generate(prompt, steps, guidance, output_number) output_wave_1 = gr.make_waveform((16000, output_wave[0])) output_wave_2 = gr.make_waveform((16000, output_wave[1])) if (2 <= output_number) else None output_wave_3 = gr.make_waveform((16000, output_wave[2])) if (output_number == 3) else None end = time.time() secondes = int(end - start) minutes = secondes // 60 secondes = secondes - (minutes * 60) hours = minutes // 60 minutes = minutes - (hours * 60) return [ output_wave_1, output_wave_2, output_wave_3, gr.update(visible = True, value = "Start again to get a different result. The output have been generated in " + ((str(hours) + " h, ") if hours != 0 else "") + ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + str(secondes) + " sec.") ] if is_space_imported: text2audio = spaces.GPU(text2audio, duration = 420) # Old code net=BriaRMBG() # model_path = "./model1.pth" #model_path = hf_hub_download("briaai/RMBG-1.4", 'model.pth') model_path = hf_hub_download("cocktailpeanut/gbmr", 'model.pth') if torch.cuda.is_available(): net.load_state_dict(torch.load(model_path)) net=net.cuda() device = "cuda" elif torch.backends.mps.is_available(): net.load_state_dict(torch.load(model_path,map_location="mps")) net=net.to("mps") device = "mps" else: net.load_state_dict(torch.load(model_path,map_location="cpu")) device = "cpu" net.eval() def resize_image(image): image = image.convert('RGB') model_input_size = (1024, 1024) image = image.resize(model_input_size, Image.BILINEAR) return image def process(image): # prepare input orig_image = Image.fromarray(image) w,h = orig_im_size = orig_image.size image = resize_image(orig_image) im_np = np.array(image) im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2,0,1) im_tensor = torch.unsqueeze(im_tensor,0) im_tensor = torch.divide(im_tensor,255.0) im_tensor = normalize(im_tensor,[0.5,0.5,0.5],[1.0,1.0,1.0]) if device == "cuda": im_tensor=im_tensor.cuda() elif device == "mps": im_tensor=im_tensor.to("mps") #inference result=net(im_tensor) # post process result = torch.squeeze(F.interpolate(result[0][0], size=(h,w), mode='bilinear') ,0) ma = torch.max(result) mi = torch.min(result) result = (result-mi)/(ma-mi) # image to pil im_array = (result*255).cpu().data.numpy().astype(np.uint8) pil_im = Image.fromarray(np.squeeze(im_array)) # paste the mask on the original image new_im = Image.new("RGBA", pil_im.size, (0,0,0,0)) new_im.paste(orig_image, mask=pil_im) # new_orig_image = orig_image.convert('RGBA') return new_im # return [new_orig_image, new_im] # block = gr.Blocks().queue() # with block: # gr.Markdown("## BRIA RMBG 1.4") # gr.HTML(''' #

# This is a demo for BRIA RMBG 1.4 that using # BRIA RMBG-1.4 image matting model as backbone. #

# ''') # with gr.Row(): # with gr.Column(): # input_image = gr.Image(sources=None, type="pil") # None for upload, ctrl+v and webcam # # input_image = gr.Image(sources=None, type="numpy") # None for upload, ctrl+v and webcam # run_button = gr.Button(value="Run") # with gr.Column(): # result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery", columns=[1], height='auto') # ips = [input_image] # run_button.click(fn=process, inputs=ips, outputs=[result_gallery]) # block.launch(debug = True) # block = gr.Blocks().queue() gr.Markdown("## BRIA RMBG 1.4") gr.HTML('''

This is a demo for BRIA RMBG 1.4 that using BRIA RMBG-1.4 image matting model as backbone.

''') title = "Background Removal" description = r"""Background removal model developed by BRIA.AI, trained on a carefully selected dataset and is available as an open-source model for non-commercial use.
For test upload your image and wait. Read more at model card briaai/RMBG-1.4.
""" examples = [['./input.jpg'],] # output = ImageSlider(position=0.5,label='Image without background', type="pil", show_download_button=True) # demo = gr.Interface(fn=process,inputs="image", outputs=output, examples=examples, title=title, description=description) demo = gr.Interface(fn=process,inputs="image", outputs="image", examples=examples, title=title, description=description) if __name__ == "__main__": demo.launch(share=False)