Spaces:
Runtime error
Runtime error
# python gradio_demo/barc_demo_v6.py | |
import os | |
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" | |
os.environ["CUDA_VISIBLE_DEVICES"]="0" | |
try: | |
# os.system("pip install --upgrade torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html") | |
os.system("pip install --upgrade torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html") | |
except Exception as e: | |
print(e) | |
import numpy as np | |
import os | |
import glob | |
import torch | |
from torch.utils.data import DataLoader | |
import torchvision | |
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor | |
import torchvision.transforms as T | |
import cv2 | |
from matplotlib import pyplot as plt | |
from PIL import Image | |
import random | |
from datetime import datetime | |
import gradio as gr | |
import sys | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../', 'src')) | |
from stacked_hourglass.datasets.imgcropslist import ImgCrops | |
from combined_model.train_main_image_to_3d_withbreedrel import do_visual_epoch | |
from combined_model.model_shape_v7 import ModelImageTo3d_withshape_withproj | |
from configs.barc_cfg_defaults import get_cfg_global_updated | |
random.seed(0) | |
print( | |
"torch: ", torch.__version__, | |
"\ntorchvision: ", torchvision.__version__, | |
) | |
def get_prediction(model, img_path_or_img, confidence=0.5): | |
""" | |
see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g | |
get_prediction | |
parameters: | |
- img_path - path of the input image | |
- confidence - threshold value for prediction score | |
method: | |
- Image is obtained from the image path | |
- the image is converted to image tensor using PyTorch's Transforms | |
- image is passed through the model to get the predictions | |
- class, box coordinates are obtained, but only prediction score > threshold | |
are chosen. | |
""" | |
if isinstance(img_path_or_img, str): | |
img = Image.open(img_path_or_img).convert('RGB') | |
else: | |
img = img_path_or_img | |
transform = T.Compose([T.ToTensor()]) | |
img = transform(img) | |
pred = model([img]) | |
# pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())] | |
pred_class = list(pred[0]['labels'].numpy()) | |
pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())] | |
pred_score = list(pred[0]['scores'].detach().numpy()) | |
try: | |
pred_t = [pred_score.index(x) for x in pred_score if x>confidence][-1] | |
pred_boxes = pred_boxes[:pred_t+1] | |
pred_class = pred_class[:pred_t+1] | |
return pred_boxes, pred_class, pred_score | |
except: | |
print('no bounding box with a score that is high enough found! -> work on full image') | |
return None, None, None | |
def detect_object(model, img_path_or_img, confidence=0.5, rect_th=2, text_size=0.5, text_th=1): | |
""" | |
see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g | |
object_detection_api | |
parameters: | |
- img_path_or_img - path of the input image | |
- confidence - threshold value for prediction score | |
- rect_th - thickness of bounding box | |
- text_size - size of the class label text | |
- text_th - thichness of the text | |
method: | |
- prediction is obtained from get_prediction method | |
- for each prediction, bounding box is drawn and text is written | |
with opencv | |
- the final image is displayed | |
""" | |
boxes, pred_cls, pred_scores = get_prediction(model, img_path_or_img, confidence) | |
if isinstance(img_path_or_img, str): | |
img = cv2.imread(img_path_or_img) | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
else: | |
img = img_path_or_img | |
is_first = True | |
bbox = None | |
if boxes is not None: | |
for i in range(len(boxes)): | |
cls = pred_cls[i] | |
if cls == 18 and bbox is None: | |
cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th) | |
# cv2.putText(img, pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th) | |
# cv2.putText(img, str(pred_scores[i]), boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th) | |
bbox = boxes[i] | |
return img, bbox | |
# -------------------------------------------------------------------------------------------------------------------- # | |
model_bbox = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) | |
model_bbox.eval() | |
def run_bbox_inference(input_image): | |
# load configs | |
cfg = get_cfg_global_updated() | |
out_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples', 'test2.png') | |
img, bbox = detect_object(model=model_bbox, img_path_or_img=input_image, confidence=0.5) | |
fig = plt.figure() # plt.figure(figsize=(20,30)) | |
plt.imsave(out_path, img) | |
plt.close() | |
return img, bbox | |
# -------------------------------------------------------------------------------------------------------------------- # | |
# -------------------------------------------------------------------------------------------------------------------- # | |
# load configs | |
cfg = get_cfg_global_updated() | |
# Select the hardware device to use for inference. | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
print('---> device: ' + device) | |
# disable gradient calculations. | |
torch.set_grad_enabled(False) | |
# prepare complete model | |
complete_model = ModelImageTo3d_withshape_withproj( | |
num_stage_comb=cfg.params.NUM_STAGE_COMB, num_stage_heads=cfg.params.NUM_STAGE_HEADS, \ | |
num_stage_heads_pose=cfg.params.NUM_STAGE_HEADS_POSE, trans_sep=cfg.params.TRANS_SEP, \ | |
arch=cfg.params.ARCH, n_joints=cfg.params.N_JOINTS, n_classes=cfg.params.N_CLASSES, \ | |
n_keyp=cfg.params.N_KEYP, n_bones=cfg.params.N_BONES, n_betas=cfg.params.N_BETAS, n_betas_limbs=cfg.params.N_BETAS_LIMBS, \ | |
n_breeds=cfg.params.N_BREEDS, n_z=cfg.params.N_Z, image_size=cfg.params.IMG_SIZE, \ | |
silh_no_tail=cfg.params.SILH_NO_TAIL, thr_keyp_sc=cfg.params.KP_THRESHOLD, add_z_to_3d_input=cfg.params.ADD_Z_TO_3D_INPUT, | |
n_segbps=cfg.params.N_SEGBPS, add_segbps_to_3d_input=cfg.params.ADD_SEGBPS_TO_3D_INPUT, add_partseg=cfg.params.ADD_PARTSEG, n_partseg=cfg.params.N_PARTSEG, \ | |
fix_flength=cfg.params.FIX_FLENGTH, structure_z_to_betas=cfg.params.STRUCTURE_Z_TO_B, structure_pose_net=cfg.params.STRUCTURE_POSE_NET, | |
nf_version=cfg.params.NF_VERSION) | |
# load trained model | |
path_model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, 'barc_complete', 'model_best.pth.tar') | |
print(path_model_file_complete) | |
assert os.path.isfile(path_model_file_complete) | |
print('Loading model weights from file: {}'.format(path_model_file_complete)) | |
checkpoint_complete = torch.load(path_model_file_complete, map_location=device) | |
state_dict_complete = checkpoint_complete['state_dict'] | |
complete_model.load_state_dict(state_dict_complete, strict=False) | |
complete_model = complete_model.to(device) | |
# create path for output files | |
save_imgs_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples') | |
if not os.path.exists(save_imgs_path): | |
os.makedirs(save_imgs_path) | |
def run_barc_inference(input_image, bbox=None): | |
input_image_list = [input_image] | |
if bbox is not None: | |
input_bbox_list = [bbox] | |
else: | |
input_bbox_list = None | |
# prepare data loader | |
val_dataset = ImgCrops(image_list=input_image_list, bbox_list=input_bbox_list, dataset_mode='complete') | |
test_name_list = val_dataset.test_name_list | |
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, | |
num_workers=0, pin_memory=True, drop_last=False) | |
# run visual evaluation | |
all_results = do_visual_epoch(val_loader, complete_model, device, | |
ImgCrops.DATA_INFO, | |
weight_dict=None, | |
acc_joints=ImgCrops.ACC_JOINTS, | |
save_imgs_path=None, # save_imgs_path, | |
metrics='all', | |
test_name_list=test_name_list, | |
render_all=cfg.params.RENDER_ALL, | |
pck_thresh=cfg.params.PCK_THRESH, | |
return_results=True) | |
# prepare output mesh | |
mesh = all_results[0]['mesh_posed'] | |
mesh.apply_transform([[-1, 0, 0, 0], | |
[0, -1, 0, 0], | |
[0, 0, 1, 1], | |
[0, 0, 0, 1]]) | |
result_path = os.path.join(save_imgs_path, test_name_list[0] + '_z') | |
mesh.export(file_obj=result_path + '.glb') | |
result_gltf = result_path + '.glb' | |
return result_gltf | |
# -------------------------------------------------------------------------------------------------------------------- # | |
total_count = 0 | |
def run_complete_inference(img_path_or_img, crop_choice): | |
now = datetime.now() | |
dt_string = now.strftime("%d/%m/%Y %H:%M:%S") | |
global total_count | |
total_count += 1 | |
print(dt_string + ' total count: ' + str(total_count)) | |
# depending on crop_choice: run faster r-cnn or take the input image directly | |
if crop_choice == "input image is cropped": | |
if isinstance(img_path_or_img, str): | |
img = cv2.imread(img_path_or_img) | |
output_interm_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) | |
else: | |
output_interm_image = img_path_or_img | |
output_interm_bbox = None | |
else: | |
output_interm_image, output_interm_bbox = run_bbox_inference(img_path_or_img.copy()) | |
# run barc inference | |
result_gltf = run_barc_inference(img_path_or_img, output_interm_bbox) | |
# add white border to image for nicer alignment | |
output_interm_image_vis = np.concatenate((255*np.ones_like(output_interm_image), output_interm_image, 255*np.ones_like(output_interm_image)), axis=1) | |
return [result_gltf, result_gltf, output_interm_image_vis] | |
######################################################################################################################## | |
# see: https://huggingface.co/spaces/radames/PIFu-Clothed-Human-Digitization/blob/main/PIFu/spaces.py | |
description = ''' | |
# BARC | |
#### Project Page | |
* https://barc.is.tue.mpg.de/ | |
#### Description | |
This is a demo for BARC (*B*reed *A*ugmented *R*egression using *C*lassification). | |
You can either submit a cropped image or choose the option to run a pretrained Faster R-CNN in order to obtain a bounding box. | |
Please have a look at the examples below. | |
<details> | |
<summary>More</summary> | |
#### Citation | |
``` | |
@inproceedings{BARC:2022, | |
title = {BARC}: Learning to Regress {3D} Dog Shape from Images by Exploiting Breed Information, | |
author = {Rueegg, Nadine and Zuffi, Silvia and Schindler, Konrad and Black, Michael J.}, | |
booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)}, | |
year = {2022} | |
} | |
``` | |
#### Image Sources (Examples) | |
* Stanford extra image dataset | |
* Images from google search engine | |
* https://www.dogtrainingnation.com/wp-content/uploads/2015/02/keep-dog-training-sessions-short.jpg | |
* https://thumbs.dreamstime.com/b/hund-und-seine-neue-hundeh%C3%BCtte-36757551.jpg | |
* https://www.mydearwhippet.com/wp-content/uploads/2021/04/whippet-temperament-2.jpg | |
* https://media.istockphoto.com/photos/ibizan-hound-at-the-shore-in-winter-picture-id1092705644?k=20&m=1092705644&s=612x612&w=0&h=ppwg92s9jI8GWnk22SOR_DWWNP8b2IUmLXSQmVey5Ss= | |
</details> | |
''' | |
example_images = sorted(glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.jpg')) + glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.png'))) | |
random.shuffle(example_images) | |
examples = [] | |
for img in example_images: | |
if os.path.basename(img)[:2] == 'z_': | |
examples.append([img, "use Faster R-CNN to get a bounding box"]) | |
else: | |
examples.append([img, "input image is cropped"]) | |
demo = gr.Interface( | |
fn=run_complete_inference, | |
description=description, | |
# inputs=gr.Image(type="filepath", label="Input Image"), | |
inputs=[gr.Image(label="Input Image"), | |
gr.Radio(["input image is cropped", "use Faster R-CNN to get a bounding box"], value="use Faster R-CNN to get a bounding box", label="Crop Choice"), | |
], | |
outputs=[ | |
gr.Model3D( | |
clear_color=[0.0, 0.0, 0.0, 0.0], label="3D Model"), | |
gr.File(label="Download 3D Model"), | |
gr.Image(label="Bounding Box (Faster R-CNN prediction)"), | |
], | |
examples=examples, | |
thumbnail="barc_thumbnail.png", | |
allow_flagging="never", | |
cache_examples=False, # True | |
examples_per_page=14, | |
) | |
demo.launch() |