Spaces:

yizhezhu
/

MoMA_demo

Paused

App Files Files Community

Kunpeng Song commited on May 21, 2024

Commit

ef3a17c

1 Parent(s): 338f71e

bg

Browse files

Files changed (5) hide show

app.py +5 -6
app_version1.py +52 -0
dataset_lib/dataset_eval_MoMA.py +7 -4
example_images/newImages/3_mask.jpg +0 -0
model_lib/modules.py +2 -2

app.py CHANGED Viewed

@@ -15,14 +15,14 @@ title = "MoMA"
 description = "This model has to run on GPU"
 article = "<p style='text-align: center'><a href='https://news.machinelearning.sg/posts/beautiful_profile_pics_remove_background_image_with_deeplabv3/'>Blog</a> | <a href='https://github.com/eugenesiow/practical-ml'>Github Repo</a></p>"
-def MoMA_demo(rgb, mask, subject, prompt):
     # move the input and model to GPU for speed if available
     with torch.no_grad():
-        generated_image = model.generate_images(rgb, mask, subject, prompt, strength=1.0, seed=2)
     return generated_image
-def inference(rgb, mask, subject, prompt):
-    result = MoMA_demo(rgb, mask, subject, prompt)
     return result
 seed_everything(0)
@@ -40,13 +40,12 @@ model = MoMA_main_modal(args).to(args.device, dtype=torch.float16)
 gr.Interface(
     inference,
     [gr.Image(type="pil", label="Input RGB"),
-     gr.Image(type="pil", label="Input Mask"),
      gr.Textbox(lines=1, label="subject"),
      gr.Textbox(lines=5, label="Prompt")],
     gr.Image(type="pil", label="Output"),
     title=title,
     description=description,
     article=article,
-    examples=[["example_images/newImages/3.jpg",'example_images/newImages/3_mask.jpg','car','A car in autumn with falling leaves.']],
     # enable_queue=True
 ).launch(debug=False)

 description = "This model has to run on GPU"
 article = "<p style='text-align: center'><a href='https://news.machinelearning.sg/posts/beautiful_profile_pics_remove_background_image_with_deeplabv3/'>Blog</a> | <a href='https://github.com/eugenesiow/practical-ml'>Github Repo</a></p>"
+def MoMA_demo(rgb, subject, prompt):
     # move the input and model to GPU for speed if available
     with torch.no_grad():
+        generated_image = model.generate_images(rgb, subject, prompt, strength=1.0, seed=2)
     return generated_image
+def inference(rgb, subject, prompt):
+    result = MoMA_demo(rgb, subject, prompt)
     return result
 seed_everything(0)
 gr.Interface(
     inference,
     [gr.Image(type="pil", label="Input RGB"),
      gr.Textbox(lines=1, label="subject"),
      gr.Textbox(lines=5, label="Prompt")],
     gr.Image(type="pil", label="Output"),
     title=title,
     description=description,
     article=article,
+    examples=[["example_images/newImages/3.jpg",'car','A car in autumn with falling leaves.']],
     # enable_queue=True
 ).launch(debug=False)

app_version1.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import gradio as gr
+import cv2
+import torch
+import numpy as np
+from torchvision import transforms
+import torch
+from pytorch_lightning import seed_everything
+from torchvision.utils import save_image
+from model_lib.modules import MoMA_main_modal
+from model_lib.utils import parse_args
+import os
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+title = "MoMA"
+description = "This model has to run on GPU"
+article = "<p style='text-align: center'><a href='https://news.machinelearning.sg/posts/beautiful_profile_pics_remove_background_image_with_deeplabv3/'>Blog</a> | <a href='https://github.com/eugenesiow/practical-ml'>Github Repo</a></p>"
+def MoMA_demo(rgb, mask, subject, prompt):
+    # move the input and model to GPU for speed if available
+    with torch.no_grad():
+        generated_image = model.generate_images(rgb, mask, subject, prompt, strength=1.0, seed=2)
+    return generated_image
+def inference(rgb, mask, subject, prompt):
+    result = MoMA_demo(rgb, mask, subject, prompt)
+    return result
+seed_everything(0)
+args = parse_args()
+#load MoMA from HuggingFace. Auto download
+model = MoMA_main_modal(args).to(args.device, dtype=torch.float16)
+################ change texture ##################
+# prompt = "A wooden sculpture of a car on the table."
+# generated_image = model.generate_images(rgb_path, mask_path, subject, prompt, strength=0.4, seed=4, return_mask=True)  # set strength to 0.4 for better prompt fidelity
+# save_image(generated_image,f"{args.output_path}/{subject}_{prompt}.jpg")
+gr.Interface(
+    inference,
+    [gr.Image(type="pil", label="Input RGB"),
+     gr.Image(type="pil", label="Input Mask"),
+     gr.Textbox(lines=1, label="subject"),
+     gr.Textbox(lines=5, label="Prompt")],
+    gr.Image(type="pil", label="Output"),
+    title=title,
+    description=description,
+    article=article,
+    examples=[["example_images/newImages/3.jpg",'example_images/newImages/3_mask.jpg','car','A car in autumn with falling leaves.']],
+    # enable_queue=True
+).launch(debug=False)

dataset_lib/dataset_eval_MoMA.py CHANGED Viewed

@@ -3,9 +3,14 @@ import numpy as np
 import torch
 from torchvision import transforms
 from llava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
-def Dataset_evaluate_MoMA(rgb_path, prompt,subject, mask_path, moMA_main_modal):
     LLaVa_processor = moMA_main_modal.image_processor_llava
     llava_config = moMA_main_modal.model_llava.config
@@ -14,9 +19,7 @@ def Dataset_evaluate_MoMA(rgb_path, prompt,subject, mask_path, moMA_main_modal):
         transforms.Resize((512, 512)),
     ])
-    rgb_path, prompt,mask_path = rgb_path, prompt,mask_path
-    image_pil = rgb_path # Image.open(rgb_path)
-    mask_pil = mask_path # Image.open(mask_path)
     blip2_opt = prompt
     if transform is not None:

 import torch
 from torchvision import transforms
 from llava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
+from rembg import remove
+def create_binary_mask(image):
+    grayscale = image.convert("L")
+    mask = grayscale.point(lambda x: 255 if x > 1 else 0, '1')
+    return mask
+def Dataset_evaluate_MoMA(image_pil, prompt,subject, moMA_main_modal):
     LLaVa_processor = moMA_main_modal.image_processor_llava
     llava_config = moMA_main_modal.model_llava.config
         transforms.Resize((512, 512)),
     ])
+    mask_pil = create_binary_mask(remove(image_pil)) # Image.open(mask_path)
     blip2_opt = prompt
     if transform is not None:

example_images/newImages/3_mask.jpg DELETED Viewed

Binary file (7.31 kB)

model_lib/modules.py CHANGED Viewed

@@ -136,8 +136,8 @@ class MoMA_main_modal(nn.Module):
     def reset(self):
         self.moMA_generator.reset_all()
-    def generate_images(self, rgb_path, mask_path, subject, prompt, strength=1.0, num=1, seed=0):
-        batch = Dataset_evaluate_MoMA(rgb_path, prompt, subject, mask_path,self)
         self.moMA_generator.set_selfAttn_strength(strength)
         with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16, cache_enabled=True):

     def reset(self):
         self.moMA_generator.reset_all()
+    def generate_images(self, rgb_path, subject, prompt, strength=1.0, num=1, seed=0):
+        batch = Dataset_evaluate_MoMA(rgb_path, prompt, subject,self)
         self.moMA_generator.set_selfAttn_strength(strength)
         with torch.cuda.amp.autocast(enabled=True, dtype=torch.float16, cache_enabled=True):