Spaces:

runa91
/

barc_gradio

Runtime error

barc_gradio / gradio_demo /barc_demo_v6.py

Nadine Rueegg

adjust print statements and close figures

6cdca0e over 2 years ago

12.7 kB

	# python gradio_demo/barc_demo_v6.py

	import os
	os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
	os.environ["CUDA_VISIBLE_DEVICES"]="0"
	try:
	# os.system("pip install --upgrade torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html")
	os.system("pip install --upgrade torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html")
	except Exception as e:
	print(e)

	import numpy as np
	import os
	import glob
	import torch
	from torch.utils.data import DataLoader
	import torchvision
	from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
	import torchvision.transforms as T
	import cv2
	from matplotlib import pyplot as plt
	from PIL import Image
	import random
	from datetime import datetime
	import gradio as gr

	import sys
	sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../', 'src'))
	from stacked_hourglass.datasets.imgcropslist import ImgCrops
	from combined_model.train_main_image_to_3d_withbreedrel import do_visual_epoch
	from combined_model.model_shape_v7 import ModelImageTo3d_withshape_withproj
	from configs.barc_cfg_defaults import get_cfg_global_updated

	random.seed(0)

	print(
	"torch: ", torch.__version__,
	"\ntorchvision: ", torchvision.__version__,
	)



	def get_prediction(model, img_path_or_img, confidence=0.5):
	"""
	see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
	get_prediction
	parameters:
	- img_path - path of the input image
	- confidence - threshold value for prediction score
	method:
	- Image is obtained from the image path
	- the image is converted to image tensor using PyTorch's Transforms
	- image is passed through the model to get the predictions
	- class, box coordinates are obtained, but only prediction score > threshold
	are chosen.
	"""
	if isinstance(img_path_or_img, str):
	img = Image.open(img_path_or_img).convert('RGB')
	else:
	img = img_path_or_img
	transform = T.Compose([T.ToTensor()])
	img = transform(img)
	pred = model([img])
	# pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
	pred_class = list(pred[0]['labels'].numpy())
	pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
	pred_score = list(pred[0]['scores'].detach().numpy())
	try:
	pred_t = [pred_score.index(x) for x in pred_score if x>confidence][-1]
	pred_boxes = pred_boxes[:pred_t+1]
	pred_class = pred_class[:pred_t+1]
	return pred_boxes, pred_class, pred_score
	except:
	print('no bounding box with a score that is high enough found! -> work on full image')
	return None, None, None


	def detect_object(model, img_path_or_img, confidence=0.5, rect_th=2, text_size=0.5, text_th=1):
	"""
	see https://haochen23.github.io/2020/04/object-detection-faster-rcnn.html#.YsMCm4TP3-g
	object_detection_api
	parameters:
	- img_path_or_img - path of the input image
	- confidence - threshold value for prediction score
	- rect_th - thickness of bounding box
	- text_size - size of the class label text
	- text_th - thichness of the text
	method:
	- prediction is obtained from get_prediction method
	- for each prediction, bounding box is drawn and text is written
	with opencv
	- the final image is displayed
	"""
	boxes, pred_cls, pred_scores = get_prediction(model, img_path_or_img, confidence)
	if isinstance(img_path_or_img, str):
	img = cv2.imread(img_path_or_img)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	else:
	img = img_path_or_img
	is_first = True
	bbox = None
	if boxes is not None:
	for i in range(len(boxes)):
	cls = pred_cls[i]
	if cls == 18 and bbox is None:
	cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th)
	# cv2.putText(img, pred_cls[i], boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
	# cv2.putText(img, str(pred_scores[i]), boxes[i][0], cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th)
	bbox = boxes[i]
	return img, bbox


	# -------------------------------------------------------------------------------------------------------------------- #
	model_bbox = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
	model_bbox.eval()

	def run_bbox_inference(input_image):
	# load configs
	cfg = get_cfg_global_updated()
	out_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples', 'test2.png')
	img, bbox = detect_object(model=model_bbox, img_path_or_img=input_image, confidence=0.5)
	fig = plt.figure() # plt.figure(figsize=(20,30))
	plt.imsave(out_path, img)
	plt.close()
	return img, bbox
	# -------------------------------------------------------------------------------------------------------------------- #


	# -------------------------------------------------------------------------------------------------------------------- #
	# load configs
	cfg = get_cfg_global_updated()
	# Select the hardware device to use for inference.
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	print('---> device: ' + device)
	# disable gradient calculations.
	torch.set_grad_enabled(False)
	# prepare complete model
	complete_model = ModelImageTo3d_withshape_withproj(
	num_stage_comb=cfg.params.NUM_STAGE_COMB, num_stage_heads=cfg.params.NUM_STAGE_HEADS, \
	num_stage_heads_pose=cfg.params.NUM_STAGE_HEADS_POSE, trans_sep=cfg.params.TRANS_SEP, \
	arch=cfg.params.ARCH, n_joints=cfg.params.N_JOINTS, n_classes=cfg.params.N_CLASSES, \
	n_keyp=cfg.params.N_KEYP, n_bones=cfg.params.N_BONES, n_betas=cfg.params.N_BETAS, n_betas_limbs=cfg.params.N_BETAS_LIMBS, \
	n_breeds=cfg.params.N_BREEDS, n_z=cfg.params.N_Z, image_size=cfg.params.IMG_SIZE, \
	silh_no_tail=cfg.params.SILH_NO_TAIL, thr_keyp_sc=cfg.params.KP_THRESHOLD, add_z_to_3d_input=cfg.params.ADD_Z_TO_3D_INPUT,
	n_segbps=cfg.params.N_SEGBPS, add_segbps_to_3d_input=cfg.params.ADD_SEGBPS_TO_3D_INPUT, add_partseg=cfg.params.ADD_PARTSEG, n_partseg=cfg.params.N_PARTSEG, \
	fix_flength=cfg.params.FIX_FLENGTH, structure_z_to_betas=cfg.params.STRUCTURE_Z_TO_B, structure_pose_net=cfg.params.STRUCTURE_POSE_NET,
	nf_version=cfg.params.NF_VERSION)
	# load trained model
	path_model_file_complete = os.path.join(cfg.paths.ROOT_CHECKPOINT_PATH, 'barc_complete', 'model_best.pth.tar')
	print(path_model_file_complete)
	assert os.path.isfile(path_model_file_complete)
	print('Loading model weights from file: {}'.format(path_model_file_complete))
	checkpoint_complete = torch.load(path_model_file_complete, map_location=device)
	state_dict_complete = checkpoint_complete['state_dict']
	complete_model.load_state_dict(state_dict_complete, strict=False)
	complete_model = complete_model.to(device)
	# create path for output files
	save_imgs_path = os.path.join(cfg.paths.ROOT_OUT_PATH, 'gradio_examples')
	if not os.path.exists(save_imgs_path):
	os.makedirs(save_imgs_path)

	def run_barc_inference(input_image, bbox=None):
	input_image_list = [input_image]
	if bbox is not None:
	input_bbox_list = [bbox]
	else:
	input_bbox_list = None
	# prepare data loader
	val_dataset = ImgCrops(image_list=input_image_list, bbox_list=input_bbox_list, dataset_mode='complete')
	test_name_list = val_dataset.test_name_list
	val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False,
	num_workers=0, pin_memory=True, drop_last=False)
	# run visual evaluation
	all_results = do_visual_epoch(val_loader, complete_model, device,
	ImgCrops.DATA_INFO,
	weight_dict=None,
	acc_joints=ImgCrops.ACC_JOINTS,
	save_imgs_path=None, # save_imgs_path,
	metrics='all',
	test_name_list=test_name_list,
	render_all=cfg.params.RENDER_ALL,
	pck_thresh=cfg.params.PCK_THRESH,
	return_results=True)
	# prepare output mesh
	mesh = all_results[0]['mesh_posed']
	mesh.apply_transform([[-1, 0, 0, 0],
	[0, -1, 0, 0],
	[0, 0, 1, 1],
	[0, 0, 0, 1]])
	result_path = os.path.join(save_imgs_path, test_name_list[0] + '_z')
	mesh.export(file_obj=result_path + '.glb')
	result_gltf = result_path + '.glb'
	return result_gltf
	# -------------------------------------------------------------------------------------------------------------------- #


	total_count = 0

	def run_complete_inference(img_path_or_img, crop_choice):
	now = datetime.now()
	dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
	global total_count
	total_count += 1
	print(dt_string + ' total count: ' + str(total_count))
	# depending on crop_choice: run faster r-cnn or take the input image directly
	if crop_choice == "input image is cropped":
	if isinstance(img_path_or_img, str):
	img = cv2.imread(img_path_or_img)
	output_interm_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
	else:
	output_interm_image = img_path_or_img
	output_interm_bbox = None
	else:
	output_interm_image, output_interm_bbox = run_bbox_inference(img_path_or_img.copy())
	# run barc inference
	result_gltf = run_barc_inference(img_path_or_img, output_interm_bbox)
	# add white border to image for nicer alignment
	output_interm_image_vis = np.concatenate((255np.ones_like(output_interm_image), output_interm_image, 255np.ones_like(output_interm_image)), axis=1)
	return [result_gltf, result_gltf, output_interm_image_vis]




	########################################################################################################################

	# see: https://huggingface.co/spaces/radames/PIFu-Clothed-Human-Digitization/blob/main/PIFu/spaces.py

	description = '''
	# BARC

	#### Project Page
	* https://barc.is.tue.mpg.de/

	#### Description
	This is a demo for BARC (Breed Augmented Regression using Classification).
	You can either submit a cropped image or choose the option to run a pretrained Faster R-CNN in order to obtain a bounding box.
	Please have a look at the examples below.
	<details>

	<summary>More</summary>

	#### Citation

	```
	@inproceedings{BARC:2022,
	title = {BARC}: Learning to Regress {3D} Dog Shape from Images by Exploiting Breed Information,
	author = {Rueegg, Nadine and Zuffi, Silvia and Schindler, Konrad and Black, Michael J.},
	booktitle = {Proceedings IEEE Conf. on Computer Vision and Pattern Recognition (CVPR)},
	year = {2022}
	}
	```

	#### Image Sources (Examples)
	* Stanford extra image dataset
	* Images from google search engine
	* https://www.dogtrainingnation.com/wp-content/uploads/2015/02/keep-dog-training-sessions-short.jpg
	* https://thumbs.dreamstime.com/b/hund-und-seine-neue-hundeh%C3%BCtte-36757551.jpg
	* https://www.mydearwhippet.com/wp-content/uploads/2021/04/whippet-temperament-2.jpg
	* https://media.istockphoto.com/photos/ibizan-hound-at-the-shore-in-winter-picture-id1092705644?k=20&m=1092705644&s=612x612&w=0&h=ppwg92s9jI8GWnk22SOR_DWWNP8b2IUmLXSQmVey5Ss=


	</details>
	'''






	example_images = sorted(glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '.jpg')) + glob.glob(os.path.join(os.path.dirname(__file__), '../', 'datasets', 'test_image_crops', '.png')))
	random.shuffle(example_images)
	examples = []
	for img in example_images:
	if os.path.basename(img)[:2] == 'z_':
	examples.append([img, "use Faster R-CNN to get a bounding box"])
	else:
	examples.append([img, "input image is cropped"])

	demo = gr.Interface(
	fn=run_complete_inference,
	description=description,
	# inputs=gr.Image(type="filepath", label="Input Image"),
	inputs=[gr.Image(label="Input Image"),
	gr.Radio(["input image is cropped", "use Faster R-CNN to get a bounding box"], value="use Faster R-CNN to get a bounding box", label="Crop Choice"),
	],
	outputs=[
	gr.Model3D(
	clear_color=[0.0, 0.0, 0.0, 0.0], label="3D Model"),
	gr.File(label="Download 3D Model"),
	gr.Image(label="Bounding Box (Faster R-CNN prediction)"),

	],
	examples=examples,
	thumbnail="barc_thumbnail.png",
	allow_flagging="never",
	cache_examples=False, # True
	examples_per_page=14,
	)

	demo.launch()