|
|
|
|
|
|
|
|
|
from os import device_encoding |
|
from diffusers import StableDiffusionInpaintPipeline |
|
from PIL import Image |
|
import torch |
|
import numpy as np |
|
import torch |
|
import gc |
|
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL |
|
from PIL import Image |
|
|
|
import requests |
|
from rembg import remove |
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
import sys |
|
import os |
|
import subprocess |
|
sys.path.append( |
|
os.path.join(os.path.dirname(__file__), "huggingface-cloth-segmentation")) |
|
|
|
from process import load_seg_model, get_palette, generate_mask |
|
|
|
|
|
device = 'cpu' |
|
|
|
|
|
|
|
def initialize_and_load_models(): |
|
|
|
checkpoint_path = 'model/cloth_segm.pth' |
|
net = load_seg_model(checkpoint_path, device=device) |
|
|
|
return net |
|
|
|
net = initialize_and_load_models() |
|
palette = get_palette(4) |
|
|
|
|
|
def run(img): |
|
|
|
cloth_seg = generate_mask(img, net=net, palette=palette, device=device) |
|
return cloth_seg |
|
|
|
def image_caption(image_path, img_type): |
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
processor = BlipProcessor.from_pretrained("noamrot/FuseCap") |
|
model = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap").to(device) |
|
|
|
raw_image = Image.open(image_path).convert('RGB') |
|
if img_type == "dress": |
|
raw_image = remove(raw_image) |
|
print("bg removed") |
|
raw_image.show |
|
|
|
|
|
text = "a picture of " |
|
inputs = processor(raw_image, text, return_tensors="pt").to(device) |
|
|
|
out = model.generate(**inputs, num_beams = 3) |
|
print(processor.decode(out[0], skip_special_tokens=True)) |
|
caption = processor.decode(out[0], skip_special_tokens=True) |
|
return caption |
|
|
|
def gen_vton(image_input, dress_input): |
|
|
|
pipe = StableDiffusionInpaintPipeline.from_pretrained( |
|
"runwayml/stable-diffusion-inpainting", |
|
|
|
torch_dtype=torch.float32, |
|
) |
|
image_path = image_input |
|
|
|
|
|
img_open = Image.open(image_path) |
|
|
|
run(img_open) |
|
gen_mask_1 = "./huggingface-cloth-segmentation/output/alpha/1.png" |
|
gen_mask_2 = "./huggingface-cloth-segmentation/output/alpha/2.png" |
|
gen_mask_3 = "./huggingface-cloth-segmentation/output/alpha/3.png" |
|
print("mask_generated") |
|
if gen_mask_1: |
|
mask_path = gen_mask_1 |
|
elif gen_mask_2: |
|
mask_path = gen_mask_2 |
|
else: |
|
mask_path = gen_mask_3 |
|
|
|
dress_path = dress_input |
|
|
|
image = Image.open(image_path) |
|
mask = Image.open(mask_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
image = image.resize((512, 512)) |
|
mask = mask.resize((512, 512)) |
|
|
|
|
|
user_caption = image_caption(image_path, "user") |
|
dress_caption = image_caption(dress_path, "dress") |
|
print(user_caption) |
|
print(dress_caption) |
|
prompt = " a human wearing a white long Sleeve Buttoned Down Blouse top with Gardenia Colors Polka Multi Dot " |
|
neg_prompt = "White sweater" |
|
|
|
|
|
|
|
|
|
|
|
guidance_scale=7.5 |
|
denoising_strength=0.9 |
|
num_samples = 2 |
|
generator = torch.Generator(device="cpu") |
|
|
|
|
|
|
|
|
|
images = pipe( |
|
prompt=prompt, |
|
negative_prompt=neg_prompt, |
|
image=image, |
|
mask_image=mask, |
|
guidance_scale=guidance_scale, |
|
denoising_strength=denoising_strength, |
|
generator=generator, |
|
num_images_per_prompt=num_samples, |
|
).images |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
images[0].save("./processed_images/output_image.jpg") |
|
images[1].save("./processed_images/output_image_1.jpg") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict(dict, prompt): |
|
image = dict['image'].convert("RGB").resize((512, 512)) |
|
mask_image = dict['mask'].convert("RGB").resize((512, 512)) |
|
|
|
return(images[0]) |
|
|
|
|