Spaces:

runa91
/

barc_gradio

Runtime error

App Files Files Community

Nadine Rueegg commited on Jul 19, 2022

Commit

432392d

1 Parent(s): d847241

use gpu as default

Browse files

Files changed (3) hide show

app.py +0 -269
gradio_demo/barc_demo_v3.py +0 -289
src/configs/barc_cfg_defaults.py +1 -1

app.py DELETED Viewed

@@ -1,269 +0,0 @@
-# python gradio_demo/barc_demo_v3.py
-import numpy as np
-import os
-import glob
-import torch
-from torch.utils.data import DataLoader
-import torchvision
-from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
-import torchvision.transforms as T
-import cv2
-from matplotlib import pyplot as plt
-from PIL import Image
-import gradio as gr
-import sys
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../', 'src'))
-from stacked_hourglass.datasets.imgcropslist import ImgCrops
-from combined_model.train_main_image_to_3d_withbreedrel import do_visual_epoch
-from combined_model.model_shape_v7 import ModelImageTo3d_withshape_withproj
-from configs.barc_cfg_defaults import get_cfg_global_updated
-def get_prediction(model, img_path_or_img, confidence=0.5):
-    """
-    see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
-    get_prediction
-        parameters:
-        - img_path - path of the input image
-        - confidence - threshold value for prediction score
-        method:
-        - Image is obtained from the image path
-        - the image is converted to image tensor using PyTorch's Transforms
-        - image is passed through the model to get the predictions
-        - class, box coordinates are obtained, but only prediction score > threshold
-            are chosen.
-    """
-    if isinstance(img_path_or_img, str):
-        img = Image.open(img_path_or_img).convert('RGB')
-    else:
-        img = img_path_or_img
-    transform = T.Compose([T.ToTensor()])
-    img = transform(img)
-    pred = model([img])
-    # pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
-    pred_class = list(pred[0]['labels'].numpy())
-    pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
-    pred_score = list(pred[0]['scores'].detach().numpy())
-    try:
-        pred_t = [pred_score.index(x) for x in pred_score if x>confidence][-1]
-        pred_boxes = pred_boxes[:pred_t+1]
-        pred_class = pred_class[:pred_t+1]
-        return pred_boxes, pred_class, pred_score
-    except:
-        print('no bounding box with a score that is high enough found! -> work on full image')
-        return None, None, None
-def detect_object(model, img_path_or_img, confidence=0.5, rect_th=2, text_size=0.5, text_th=1):
-    """
-    see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
-    object_detection_api
-        parameters:
-        - img_path_or_img - path of the input image
-        - confidence - threshold value for prediction score
-        - rect_th - thickness of bounding box
-        - text_size - size of the class label text
-        - text_th - thichness of the text
-        method:
-        - prediction is obtained from get_prediction method
-        - for each prediction, bounding box is drawn and text is written
-            with opencv
-        - the final image is displayed
-    """
-    boxes, pred_cls, pred_scores = get_prediction(model, img_path_or_img, confidence)
-    if isinstance(img_path_or_img, str):
-        img = cv2.imread(img_path_or_img)
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    else:
-        img = img_path_or_img
-    is_first = True
-    bbox = None
-    if boxes is not None:
-        for i in range(len(boxes)):
-            cls = pred_cls[i]
-            if cls == 18 and bbox is None:
-                cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th)
-                # cv2.putText(img, pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
-                cv2.putText(img, str(pred_scores[i]), boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
-                bbox = boxes[i]
-    return img, bbox
-def run_bbox_inference(input_image):
-    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
-    model.eval()
-    out_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples', 'test2.png')
-    img, bbox = detect_object(model=model, img_path_or_img=input_image, confidence=0.5)
-    fig = plt.figure()   #  plt.figure(figsize=(20,30))
-    plt.imsave(out_path, img)
-    return img, bbox
-def run_barc_inference(input_image, bbox=None):
-    # load configs
-    cfg = get_cfg_global_updated()
-    model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, 'barc_complete', 'model_best.pth.tar')
-    # Select the hardware device to use for inference.
-    if torch.cuda.is_available() and cfg.device=='cuda':
-        device = torch.device('cuda', torch.cuda.current_device())
-        # torch.backends.cudnn.benchmark = True
-    else:
-        device = torch.device('cpu')
-    path_model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, model_file_complete)
-    # Disable gradient calculations.
-    torch.set_grad_enabled(False)
-    # prepare complete model
-    complete_model = ModelImageTo3d_withshape_withproj(
-        num_stage_comb=cfg.params.NUM_STAGE_COMB, num_stage_heads=cfg.params.NUM_STAGE_HEADS, \
-        num_stage_heads_pose=cfg.params.NUM_STAGE_HEADS_POSE, trans_sep=cfg.params.TRANS_SEP, \
-        arch=cfg.params.ARCH, n_joints=cfg.params.N_JOINTS, n_classes=cfg.params.N_CLASSES, \
-        n_keyp=cfg.params.N_KEYP, n_bones=cfg.params.N_BONES, n_betas=cfg.params.N_BETAS, n_betas_limbs=cfg.params.N_BETAS_LIMBS, \
-        n_breeds=cfg.params.N_BREEDS, n_z=cfg.params.N_Z, image_size=cfg.params.IMG_SIZE, \
-        silh_no_tail=cfg.params.SILH_NO_TAIL, thr_keyp_sc=cfg.params.KP_THRESHOLD, add_z_to_3d_input=cfg.params.ADD_Z_TO_3D_INPUT,
-        n_segbps=cfg.params.N_SEGBPS, add_segbps_to_3d_input=cfg.params.ADD_SEGBPS_TO_3D_INPUT, add_partseg=cfg.params.ADD_PARTSEG, n_partseg=cfg.params.N_PARTSEG, \
-        fix_flength=cfg.params.FIX_FLENGTH, structure_z_to_betas=cfg.params.STRUCTURE_Z_TO_B, structure_pose_net=cfg.params.STRUCTURE_POSE_NET,
-        nf_version=cfg.params.NF_VERSION)
-    # load trained model
-    print(path_model_file_complete)
-    assert os.path.isfile(path_model_file_complete)
-    print('Loading model weights from file: {}'.format(path_model_file_complete))
-    checkpoint_complete = torch.load(path_model_file_complete)
-    state_dict_complete = checkpoint_complete['state_dict']
-    complete_model.load_state_dict(state_dict_complete, strict=False)
-    complete_model = complete_model.to(device)
-    save_imgs_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples')
-    if not os.path.exists(save_imgs_path):
-        os.makedirs(save_imgs_path)
-    input_image_list = [input_image]
-    if bbox is not None:
-        input_bbox_list = [bbox]
-    else:
-        input_bbox_list = None
-    val_dataset = ImgCrops(image_list=input_image_list, bbox_list=input_bbox_list, dataset_mode='complete')
-    test_name_list = val_dataset.test_name_list
-    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False,
-                            num_workers=0, pin_memory=True, drop_last=False)
-    # run visual evaluation
-    #   remark: take ACC_Joints and DATA_INFO from StanExt as this is the training dataset
-    all_results = do_visual_epoch(val_loader, complete_model, device,
-                        ImgCrops.DATA_INFO,
-                        weight_dict=None,
-                        acc_joints=ImgCrops.ACC_JOINTS,
-                        save_imgs_path=None, # save_imgs_path,
-                        metrics='all',
-                        test_name_list=test_name_list,
-                        render_all=cfg.params.RENDER_ALL,
-                        pck_thresh=cfg.params.PCK_THRESH,
-                        return_results=True)
-    mesh = all_results[0]['mesh_posed']
-    result_path = os.path.join(save_imgs_path, test_name_list[0] + '_z')
-    mesh.apply_transform([[-1, 0, 0, 0],
-                            [0, -1, 0, 0],
-                            [0, 0, 1, 1],
-                            [0, 0, 0, 1]])
-    mesh.export(file_obj=result_path + '.glb')
-    result_gltf = result_path + '.glb'
-    return [result_gltf, result_gltf]
-def run_complete_inference(input_image):
-    output_interm_image, output_interm_bbox = run_bbox_inference(input_image.copy())
-    print(output_interm_bbox)
-    # output_image = run_barc_inference(input_image)
-    output_image = run_barc_inference(input_image, output_interm_bbox)
-    return output_image
-# demo = gr.Interface(run_barc_inference, gr.Image(), "image")
-# demo = gr.Interface(run_complete_inference, gr.Image(), "image")
-# see: https://huggingface.co/spaces/radames/PIFu-Clothed-Human-Digitization/blob/main/PIFu/spaces.py
-description = '''
-# BARC (old)
-#### Project Page
-* https://barc.is.tue.mpg.de/
-#### Description
-This is a demo for BARC. While BARC is trained on image crops, this demo uses a pretrained Faster-RCNN in order to get bounding boxes for the dogs.
-To see your result you may have to wait a minute or two, please be paitient.
-<details>
-<summary>More</summary>
-#### Citation
-```
-@inproceedings{BARC:2022,
-    title = {BARC}: Learning to Regress {3D} Dog Shape from Images by Exploiting Breed Information,
-    author = {Rueegg, Nadine and Zuffi, Silvia and Schindler, Konrad and Black, Michael J.},
-    booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)},
-    year = {2022}
-}
-```
-</details>
-'''
-examples = sorted(glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.jpg')) + glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.png')))
-demo = gr.Interface(
-    fn=run_complete_inference,
-    description=description,
-    # inputs=gr.Image(type="filepath", label="Input Image"),
-    inputs=gr.Image(label="Input Image"),
-    outputs=[
-        gr.Model3D(
-            clear_color=[0.0, 0.0, 0.0, 0.0],  label="3D Model"),
-        gr.File(label="Download 3D Model")
-    ],
-    examples=examples,
-    thumbnail="barc_thumbnail.png",
-    allow_flagging="never",
-    cache_examples=True
-)
-demo.launch(share=True)

gradio_demo/barc_demo_v3.py DELETED Viewed

@@ -1,289 +0,0 @@
-# python gradio_demo/barc_demo_v3.py
-import os
-os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
-os.environ["CUDA_VISIBLE_DEVICES"]="0"
-try:
-    # os.system("pip install --upgrade  torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html")
-    os.system("pip install --upgrade  torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html")
-except Exception as e:
-    print(e)
-import numpy as np
-import os
-import glob
-import torch
-from torch.utils.data import DataLoader
-import torchvision
-from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
-import torchvision.transforms as T
-import cv2
-from matplotlib import pyplot as plt
-from PIL import Image
-import gradio as gr
-import sys
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../', 'src'))
-from stacked_hourglass.datasets.imgcropslist import ImgCrops
-from combined_model.train_main_image_to_3d_withbreedrel import do_visual_epoch
-from combined_model.model_shape_v7 import ModelImageTo3d_withshape_withproj
-from configs.barc_cfg_defaults import get_cfg_global_updated
-print(
-    "torch: ", torch.__version__,
-    "\ntorchvision: ", torchvision.__version__,
-)
-# print("EnV", os.environ)
-def get_prediction(model, img_path_or_img, confidence=0.5):
-    """
-    see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
-    get_prediction
-        parameters:
-        - img_path - path of the input image
-        - confidence - threshold value for prediction score
-        method:
-        - Image is obtained from the image path
-        - the image is converted to image tensor using PyTorch's Transforms
-        - image is passed through the model to get the predictions
-        - class, box coordinates are obtained, but only prediction score > threshold
-            are chosen.
-    """
-    if isinstance(img_path_or_img, str):
-        img = Image.open(img_path_or_img).convert('RGB')
-    else:
-        img = img_path_or_img
-    transform = T.Compose([T.ToTensor()])
-    img = transform(img)
-    pred = model([img])
-    # pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
-    pred_class = list(pred[0]['labels'].numpy())
-    pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
-    pred_score = list(pred[0]['scores'].detach().numpy())
-    try:
-        pred_t = [pred_score.index(x) for x in pred_score if x>confidence][-1]
-        pred_boxes = pred_boxes[:pred_t+1]
-        pred_class = pred_class[:pred_t+1]
-        return pred_boxes, pred_class, pred_score
-    except:
-        print('no bounding box with a score that is high enough found! -> work on full image')
-        return None, None, None
-def detect_object(model, img_path_or_img, confidence=0.5, rect_th=2, text_size=0.5, text_th=1):
-    """
-    see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
-    object_detection_api
-        parameters:
-        - img_path_or_img - path of the input image
-        - confidence - threshold value for prediction score
-        - rect_th - thickness of bounding box
-        - text_size - size of the class label text
-        - text_th - thichness of the text
-        method:
-        - prediction is obtained from get_prediction method
-        - for each prediction, bounding box is drawn and text is written
-            with opencv
-        - the final image is displayed
-    """
-    boxes, pred_cls, pred_scores = get_prediction(model, img_path_or_img, confidence)
-    if isinstance(img_path_or_img, str):
-        img = cv2.imread(img_path_or_img)
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    else:
-        img = img_path_or_img
-    is_first = True
-    bbox = None
-    if boxes is not None:
-        for i in range(len(boxes)):
-            cls = pred_cls[i]
-            if cls == 18 and bbox is None:
-                cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th)
-                # cv2.putText(img, pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
-                cv2.putText(img, str(pred_scores[i]), boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
-                bbox = boxes[i]
-    return img, bbox
-def run_bbox_inference(input_image):
-    # load configs
-    cfg = get_cfg_global_updated()
-    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
-    model.eval()
-    out_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples', 'test2.png')
-    img, bbox = detect_object(model=model, img_path_or_img=input_image, confidence=0.5)
-    fig = plt.figure()   #  plt.figure(figsize=(20,30))
-    plt.imsave(out_path, img)
-    return img, bbox
-def run_barc_inference(input_image, bbox=None):
-    # load configs
-    cfg = get_cfg_global_updated()
-    model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, 'barc_complete', 'model_best.pth.tar')
-    # Select the hardware device to use for inference.
-    '''if torch.cuda.is_available() and cfg.device=='cuda':
-        device = torch.device('cuda', torch.cuda.current_device())
-        # torch.backends.cudnn.benchmark = True
-    else:
-        device = torch.device('cpu')'''
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    print('----------------------> device: ')
-    print(device)
-    path_model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, model_file_complete)
-    # Disable gradient calculations.
-    torch.set_grad_enabled(False)
-    # prepare complete model
-    complete_model = ModelImageTo3d_withshape_withproj(
-        num_stage_comb=cfg.params.NUM_STAGE_COMB, num_stage_heads=cfg.params.NUM_STAGE_HEADS, \
-        num_stage_heads_pose=cfg.params.NUM_STAGE_HEADS_POSE, trans_sep=cfg.params.TRANS_SEP, \
-        arch=cfg.params.ARCH, n_joints=cfg.params.N_JOINTS, n_classes=cfg.params.N_CLASSES, \
-        n_keyp=cfg.params.N_KEYP, n_bones=cfg.params.N_BONES, n_betas=cfg.params.N_BETAS, n_betas_limbs=cfg.params.N_BETAS_LIMBS, \
-        n_breeds=cfg.params.N_BREEDS, n_z=cfg.params.N_Z, image_size=cfg.params.IMG_SIZE, \
-        silh_no_tail=cfg.params.SILH_NO_TAIL, thr_keyp_sc=cfg.params.KP_THRESHOLD, add_z_to_3d_input=cfg.params.ADD_Z_TO_3D_INPUT,
-        n_segbps=cfg.params.N_SEGBPS, add_segbps_to_3d_input=cfg.params.ADD_SEGBPS_TO_3D_INPUT, add_partseg=cfg.params.ADD_PARTSEG, n_partseg=cfg.params.N_PARTSEG, \
-        fix_flength=cfg.params.FIX_FLENGTH, structure_z_to_betas=cfg.params.STRUCTURE_Z_TO_B, structure_pose_net=cfg.params.STRUCTURE_POSE_NET,
-        nf_version=cfg.params.NF_VERSION)
-    # load trained model
-    print(path_model_file_complete)
-    assert os.path.isfile(path_model_file_complete)
-    print('Loading model weights from file: {}'.format(path_model_file_complete))
-    checkpoint_complete = torch.load(path_model_file_complete, map_location=device)
-    state_dict_complete = checkpoint_complete['state_dict']
-    complete_model.load_state_dict(state_dict_complete, strict=False)
-    complete_model = complete_model.to(device)
-    save_imgs_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples')
-    if not os.path.exists(save_imgs_path):
-        os.makedirs(save_imgs_path)
-    input_image_list = [input_image]
-    if bbox is not None:
-        input_bbox_list = [bbox]
-    else:
-        input_bbox_list = None
-    val_dataset = ImgCrops(image_list=input_image_list, bbox_list=input_bbox_list, dataset_mode='complete')
-    test_name_list = val_dataset.test_name_list
-    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False,
-                            num_workers=0, pin_memory=True, drop_last=False)
-    # run visual evaluation
-    #   remark: take ACC_Joints and DATA_INFO from StanExt as this is the training dataset
-    all_results = do_visual_epoch(val_loader, complete_model, device,
-                        ImgCrops.DATA_INFO,
-                        weight_dict=None,
-                        acc_joints=ImgCrops.ACC_JOINTS,
-                        save_imgs_path=None, # save_imgs_path,
-                        metrics='all',
-                        test_name_list=test_name_list,
-                        render_all=cfg.params.RENDER_ALL,
-                        pck_thresh=cfg.params.PCK_THRESH,
-                        return_results=True)
-    mesh = all_results[0]['mesh_posed']
-    result_path = os.path.join(save_imgs_path, test_name_list[0] + '_z')
-    mesh.apply_transform([[-1, 0, 0, 0],
-                            [0, -1, 0, 0],
-                            [0, 0, 1, 1],
-                            [0, 0, 0, 1]])
-    mesh.export(file_obj=result_path + '.glb')
-    result_gltf = result_path + '.glb'
-    return [result_gltf, result_gltf]
-def run_complete_inference(input_image):
-    output_interm_image, output_interm_bbox = run_bbox_inference(input_image.copy())
-    print(output_interm_bbox)
-    # output_image = run_barc_inference(input_image)
-    output_image = run_barc_inference(input_image, output_interm_bbox)
-    return output_image
-# demo = gr.Interface(run_barc_inference, gr.Image(), "image")
-# demo = gr.Interface(run_complete_inference, gr.Image(), "image")
-# see: https://huggingface.co/spaces/radames/PIFu-Clothed-Human-Digitization/blob/main/PIFu/spaces.py
-description = '''
-# BARC
-#### Project Page
-* https://barc.is.tue.mpg.de/
-#### Description
-This is a demo for BARC. While BARC is trained on image crops, this demo uses a pretrained Faster-RCNN in order to get bounding boxes for the dogs.
-To see your result you may have to wait a minute or two, please be paitient.
-<details>
-<summary>More</summary>
-#### Citation
-```
-@inproceedings{BARC:2022,
-    title = {BARC}: Learning to Regress {3D} Dog Shape from Images by Exploiting Breed Information,
-    author = {Rueegg, Nadine and Zuffi, Silvia and Schindler, Konrad and Black, Michael J.},
-    booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)},
-    year = {2022}
-}
-```
-</details>
-'''
-examples = sorted(glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.jpg')) + glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '*.png')))
-demo = gr.Interface(
-    fn=run_complete_inference,
-    description=description,
-    # inputs=gr.Image(type="filepath", label="Input Image"),
-    inputs=gr.Image(label="Input Image"),
-    outputs=[
-        gr.Model3D(
-            clear_color=[0.0, 0.0, 0.0, 0.0],  label="3D Model"),
-        gr.File(label="Download 3D Model")
-    ],
-    examples=examples,
-    thumbnail="barc_thumbnail.png",
-    allow_flagging="never",
-    cache_examples=False        # True
-)
-demo.launch()       # (share=True)

src/configs/barc_cfg_defaults.py CHANGED Viewed

@@ -8,7 +8,7 @@ abs_barc_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..
 _C = CN()
 _C.barc_dir = abs_barc_dir
-_C.device = 'cpu'       # 'cuda'
 ## path settings
 _C.paths = CN()

 _C = CN()
 _C.barc_dir = abs_barc_dir
+_C.device = 'cuda'  # 'cpu'       # 'cuda'
 ## path settings
 _C.paths = CN()