IDM-VTON2

Runtime error

App Files Files Community

alexff91 commited on Aug 11, 2024

Commit

d89c58b

verified ·

1 Parent(s): 909b870

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -15

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import spaces
-# Define a function to initialize all CUDA-related imports and models
 def initialize_cuda_models():
     import torch
     import os
@@ -21,12 +21,14 @@ def initialize_cuda_models():
     base_path = 'yisol/IDM-VTON'
-    # Load all the models
     unet = UNet2DConditionModel.from_pretrained(
         base_path,
         subfolder="unet",
         torch_dtype=torch.float16,
-    )
     unet.requires_grad_(False)
     tokenizer_one = AutoTokenizer.from_pretrained(
@@ -48,32 +50,32 @@ def initialize_cuda_models():
         base_path,
         subfolder="text_encoder",
         torch_dtype=torch.float16,
-    )
     text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
         base_path,
         subfolder="text_encoder_2",
         torch_dtype=torch.float16,
-    )
     image_encoder = CLIPVisionModelWithProjection.from_pretrained(
         base_path,
         subfolder="image_encoder",
         torch_dtype=torch.float16,
-    )
     vae = AutoencoderKL.from_pretrained(
         base_path,
         subfolder="vae",
         torch_dtype=torch.float16,
-    )
     UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
         base_path,
         subfolder="unet_encoder",
         torch_dtype=torch.float16,
-    )
-    # Initialize auxiliary models
     parsing_model = Parsing(0)
     openpose_model = OpenPose(0)
@@ -110,10 +112,10 @@ def initialize_cuda_models():
     pipe.unet_encoder = UNet_Encoder
-    return pipe, openpose_model, parsing_model, tensor_transform
-# Initialize models once
-pipe, openpose_model, parsing_model, tensor_transform = initialize_cuda_models()
 from PIL import Image
 import numpy as np
@@ -137,8 +139,7 @@ def pil_to_binary_mask(pil_image, threshold=0):
 @spaces.GPU
 def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
-    device = "cuda"
     openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
     pipe.unet_encoder.to(device)
@@ -173,7 +174,9 @@ def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denois
     human_img_arg = _apply_exif_orientation(human_img.resize((384, 512)))
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
-    args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
     pose_img = args.func(args, human_img_arg)
     pose_img = pose_img[:, :, ::-1]
     pose_img = Image.fromarray(pose_img).resize((768, 1024))

 import spaces
+# Function to initialize all CUDA-related imports and models
 def initialize_cuda_models():
     import torch
     import os
     base_path = 'yisol/IDM-VTON'
+    # Load all the models on GPU 0
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
     unet = UNet2DConditionModel.from_pretrained(
         base_path,
         subfolder="unet",
         torch_dtype=torch.float16,
+    ).to(device)
     unet.requires_grad_(False)
     tokenizer_one = AutoTokenizer.from_pretrained(
         base_path,
         subfolder="text_encoder",
         torch_dtype=torch.float16,
+    ).to(device)
     text_encoder_two = CLIPTextModelWithProjection.from_pretrained(
         base_path,
         subfolder="text_encoder_2",
         torch_dtype=torch.float16,
+    ).to(device)
     image_encoder = CLIPVisionModelWithProjection.from_pretrained(
         base_path,
         subfolder="image_encoder",
         torch_dtype=torch.float16,
+    ).to(device)
     vae = AutoencoderKL.from_pretrained(
         base_path,
         subfolder="vae",
         torch_dtype=torch.float16,
+    ).to(device)
     UNet_Encoder = UNet2DConditionModel_ref.from_pretrained(
         base_path,
         subfolder="unet_encoder",
         torch_dtype=torch.float16,
+    ).to(device)
+    # Initialize auxiliary models on CPU if they don't strictly require GPU
     parsing_model = Parsing(0)
     openpose_model = OpenPose(0)
     pipe.unet_encoder = UNet_Encoder
+    return pipe, openpose_model, parsing_model, tensor_transform, device
+# Initialize models and device once
+pipe, openpose_model, parsing_model, tensor_transform, device = initialize_cuda_models()
 from PIL import Image
 import numpy as np
 @spaces.GPU
 def start_tryon(dict, garm_img, garment_des, is_checked, is_checked_crop, denoise_steps, seed):
+    # Using GPU 0
     openpose_model.preprocessor.body_estimation.model.to(device)
     pipe.to(device)
     pipe.unet_encoder.to(device)
     human_img_arg = _apply_exif_orientation(human_img.resize((384, 512)))
     human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
+    args = apply_net.create_argument_parser().parse_args(
+        ('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda')
+    )
     pose_img = args.func(args, human_img_arg)
     pose_img = pose_img[:, :, ::-1]
     pose_img = Image.fromarray(pose_img).resize((768, 1024))