Spaces:

lemonaddie
/

geowizard

Running on Zero

App Files Files Community

lemonaddie commited on Mar 28, 2024

Commit

86c9440

verified ·

1 Parent(s): 9c7e27e

Update app_recon.py

Browse files

Files changed (1) hide show

app_recon.py +55 -32

app_recon.py CHANGED Viewed

@@ -55,12 +55,18 @@ from torchvision.transforms import InterpolationMode
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-stable_diffusion_repo_path = "stabilityai/stable-diffusion-2-1-unclip"
-vae = AutoencoderKL.from_pretrained(stable_diffusion_repo_path, subfolder='vae')
-scheduler = DDIMScheduler.from_pretrained(stable_diffusion_repo_path, subfolder='scheduler')
-sd_image_variations_diffusers_path = 'lambdalabs/sd-image-variations-diffusers'
-image_encoder = CLIPVisionModelWithProjection.from_pretrained(sd_image_variations_diffusers_path, subfolder="image_encoder")
-feature_extractor = CLIPImageProcessor.from_pretrained(sd_image_variations_diffusers_path, subfolder="feature_extractor")
 unet = UNet2DConditionModel.from_pretrained('.', subfolder="unet")
 pipe = DepthNormalEstimationPipeline(vae=vae,
@@ -77,6 +83,16 @@ except:
 pipe = pipe.to(device)
 def sam_init():
     #sam_checkpoint = os.path.join(os.path.dirname(__file__), "sam_pt", "sam_vit_l_0b3195.pth")
     #model_type = "vit_l"
@@ -110,6 +126,7 @@ def sam_segment(predictor, input_image, *bbox_coords):
     torch.cuda.empty_cache()
     return Image.fromarray(out_image_bbox, mode='RGBA'), masks_bbox
 @spaces.GPU
 def depth_normal(img_path,
                 denoising_steps,
@@ -124,6 +141,8 @@ def depth_normal(img_path,
     img = Image.open(img_path)
     pipe_out = pipe(
         img,
         denoising_steps=denoising_steps,
@@ -152,16 +171,14 @@ def depth_normal(img_path,
     return depth_colored, normal_colored, [depth_path, normal_path]
-@spaces.GPU
-def reconstruction(image, files):
-    torch.cuda.empty_cache()
-    img = Image.open(image)
-    width, height = img.size
-    image_rem = img.convert('RGBA').resize((width//2, height//2), Image.LANCZOS)
     image_nobg = remove(image_rem, alpha_matting=True)
     arr = np.asarray(image_nobg)[:,:,-1]
     x_nonzero = np.nonzero(arr.sum(axis=0))
@@ -172,10 +189,21 @@ def reconstruction(image, files):
     y_max = int(y_nonzero[0].max())
     masked_image, mask = sam_segment(sam_predictor, img.convert('RGB'), x_min, y_min, x_max, y_max)
-    mask = mask[-1].resize((width, height), Image.LANCZOS)
     depth_np = np.load(files[0])
     normal_np = np.load(files[1])
     dir_name = os.path.dirname(os.path.realpath(files[0]))
     mask_output_temp = mask
     name_base = os.path.splitext(os.path.basename(files[0]))[0][:-6]
@@ -193,7 +221,7 @@ def reconstruction(image, files):
     torch.cuda.empty_cache()
-    return obj_path, masked_image, [ply_path]
 def run_demo():
@@ -278,6 +306,8 @@ def run_demo():
                 depth = gr.Image(interactive=False, show_label=False)
             with gr.Column():
                 normal = gr.Image(interactive=False, show_label=False)
         with gr.Row():
             files = gr.Files(
@@ -287,29 +317,21 @@ def run_demo():
             )
         with gr.Row():
-            recon_btn = gr.Button('(Beta) Is there a salient foreground object? If yes, Click here to Reconstruct its 3D model.', variant='primary', interactive=True)
         with gr.Row():
-            with gr.Column():
-                masked_image = gr.Image(interactive=False, height=320, label="Masked foreground.")
-            with gr.Column():
-                reconstructed_3d = gr.Model3D(
-                    label = 'Bini post-processed 3D model', height=320, interactive=False,
                 )
-            # reconstructed_3d = gr.Files(
-            #     label = "Bini post-processed 3D model (plyfile)",
-            #     elem_id = "download",
-            #     interactive=False,
-            # )
         with gr.Row():
             reconstructed_file = gr.Files(
                 label = "3D Mesh (plyfile)",
                 elem_id = "download",
-                interactive=False,
             )
         run_btn.click(fn=depth_normal,
                         inputs=[input_image, denoising_steps,
                                 ensemble_size,
@@ -318,9 +340,10 @@ def run_demo():
                                 domain],
                         outputs=[depth, normal, files]
                         )
-        recon_btn.click(fn=reconstruction,
-                        inputs=[input_image, files],
-                        outputs=[reconstructed_3d, masked_image, reconstructed_file]
                         )
         demo.queue().launch(share=True, max_threads=80)

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# stable_diffusion_repo_path = "stabilityai/stable-diffusion-2-1-unclip"
+# sd_image_variations_diffusers_path = 'lambdalabs/sd-image-variations-diffusers'
+# vae = AutoencoderKL.from_pretrained(stable_diffusion_repo_path, subfolder='vae')
+# scheduler = DDIMScheduler.from_pretrained(stable_diffusion_repo_path, subfolder='scheduler')
+# image_encoder = CLIPVisionModelWithProjection.from_pretrained(sd_image_variations_diffusers_path, subfolder="image_encoder")
+# feature_extractor = CLIPImageProcessor.from_pretrained(sd_image_variations_diffusers_path, subfolder="feature_extractor")
+vae = AutoencoderKL.from_pretrained("./", subfolder='vae')
+scheduler = DDIMScheduler.from_pretrained("./", subfolder='scheduler')
+image_encoder = CLIPVisionModelWithProjection.from_pretrained("./", subfolder="image_encoder")
+feature_extractor = CLIPImageProcessor.from_pretrained("./", subfolder="feature_extractor")
 unet = UNet2DConditionModel.from_pretrained('.', subfolder="unet")
 pipe = DepthNormalEstimationPipeline(vae=vae,
 pipe = pipe.to(device)
+def scale_img(img):
+    width, height = img.size
+    if min(width, height) > 480:
+        scale = 480 / min(width, height)
+        img = img.resize((int(width*scale), int(scale*height)), Image.LANCZOS)
+    return img
 def sam_init():
     #sam_checkpoint = os.path.join(os.path.dirname(__file__), "sam_pt", "sam_vit_l_0b3195.pth")
     #model_type = "vit_l"
     torch.cuda.empty_cache()
     return Image.fromarray(out_image_bbox, mode='RGBA'), masks_bbox
 @spaces.GPU
 def depth_normal(img_path,
                 denoising_steps,
     img = Image.open(img_path)
+    img = scale_img(img)
     pipe_out = pipe(
         img,
         denoising_steps=denoising_steps,
     return depth_colored, normal_colored, [depth_path, normal_path]
+def seg_foreground(image_file):
+    img = Image.open(image_file)
+    img = scale_img(img)
+    image_rem = img.convert('RGBA') #
+    print("after resize ", image_rem.size)
     image_nobg = remove(image_rem, alpha_matting=True)
     arr = np.asarray(image_nobg)[:,:,-1]
     x_nonzero = np.nonzero(arr.sum(axis=0))
     y_max = int(y_nonzero[0].max())
     masked_image, mask = sam_segment(sam_predictor, img.convert('RGB'), x_min, y_min, x_max, y_max)
+    mask = Image.fromarray(np.array(mask[-1]).astype(np.uint8) * 255)
+    return masked_image, mask
+@spaces.GPU
+def reconstruction(mask, files):
+    torch.cuda.empty_cache()
+    mask = mask[:, :, 0] > 0.5
     depth_np = np.load(files[0])
     normal_np = np.load(files[1])
+    h, w, _ = np.shape(normal_np)
     dir_name = os.path.dirname(os.path.realpath(files[0]))
     mask_output_temp = mask
     name_base = os.path.splitext(os.path.basename(files[0]))[0][:-6]
     torch.cuda.empty_cache()
+    return obj_path, [ply_path]
 def run_demo():
                 depth = gr.Image(interactive=False, show_label=False)
             with gr.Column():
                 normal = gr.Image(interactive=False, show_label=False)
+            with gr.Column():
+                masked_image = gr.Image(interactive=False, label="Masked foreground.")
         with gr.Row():
             files = gr.Files(
             )
         with gr.Row():
+            recon_btn = gr.Button('Is there a salient foreground object? If yes, Click here to Reconstruct its 3D model.', variant='primary', interactive=True)
         with gr.Row():
+            reconstructed_3d = gr.Model3D(
+                    label = 'Bini post-processed 3D model', interactive=False
                 )
         with gr.Row():
             reconstructed_file = gr.Files(
                 label = "3D Mesh (plyfile)",
                 elem_id = "download",
+                interactive=False
             )
+        mask = gr.Image(interactive=False, label="Masked foreground.", visible=False)
         run_btn.click(fn=depth_normal,
                         inputs=[input_image, denoising_steps,
                                 ensemble_size,
                                 domain],
                         outputs=[depth, normal, files]
                         )
+        recon_btn.click(fn=seg_foreground, inputs=[input_image], outputs=[masked_image, mask]
+                        ).success(fn=reconstruction,
+                        inputs=[mask, files],
+                        outputs=[reconstructed_3d, reconstructed_file]
                         )
         demo.queue().launch(share=True, max_threads=80)