Spaces:

schibsted-presplit
/

Facial_Recognition_with_Sentiment_Detector

Running

App Files Files Community

Facial_Recognition_with_Sentiment_Detector / utils.py

drsaikirant88

initial commit with working code (local)

b202543 about 2 years ago

raw

history blame contribute delete

8.05 kB

	# PyTorch implementation of Darknet
	# This is a custom, hard-coded version of darknet with
	# YOLOv3 implementation for openimages database. This
	# was written to test viability of implementing YOLO
	# for face detection followed by emotion / sentiment
	# analysis.
	#
	# Configuration, weights and data are hardcoded.
	# Additional options include, ability to create
	# subset of data with faces exracted for labelling.
	#
	# Author : Saikiran Tharimena
	# Co-Authors: Kjetil Marinius Sjulsen, Juan Carlos Calvet Lopez
	# Project : Emotion / Sentiment Detection from news images
	# Date : 12 September 2022
	# Version : v0.1
	#
	# (C) Schibsted ASA

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.autograd import Variable
	import numpy as np
	import cv2


	def unique(tensor):
	tensor_np = tensor.cpu().numpy()
	unique_np = np.unique(tensor_np)
	unique_tensor = torch.from_numpy(unique_np)

	tensor_res = tensor.new(unique_tensor.shape)
	tensor_res.copy_(unique_tensor)
	return tensor_res


	def bbox_iou(box1, box2):
	"""
	Returns the IoU of two bounding boxes


	"""
	#Get the coordinates of bounding boxes
	b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
	b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]

	#get the corrdinates of the intersection rectangle
	inter_rect_x1 = torch.max(b1_x1, b2_x1)
	inter_rect_y1 = torch.max(b1_y1, b2_y1)
	inter_rect_x2 = torch.min(b1_x2, b2_x2)
	inter_rect_y2 = torch.min(b1_y2, b2_y2)

	#Intersection area
	inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0)

	#Union Area
	b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
	b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)

	iou = inter_area / (b1_area + b2_area - inter_area)

	return iou


	def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True):

	batch_size = prediction.size(0)
	stride = inp_dim // prediction.size(2)
	grid_size = inp_dim // stride
	bbox_attrs = 5 + num_classes
	num_anchors = len(anchors)

	prediction = prediction.view(batch_size, bbox_attrsnum_anchors, grid_sizegrid_size)
	prediction = prediction.transpose(1,2).contiguous()
	prediction = prediction.view(batch_size, grid_sizegrid_sizenum_anchors, bbox_attrs)
	anchors = [(a[0]/stride, a[1]/stride) for a in anchors]

	#Sigmoid the centre_X, centre_Y. and object confidencce
	prediction[:,:,0] = torch.sigmoid(prediction[:,:,0])
	prediction[:,:,1] = torch.sigmoid(prediction[:,:,1])
	prediction[:,:,4] = torch.sigmoid(prediction[:,:,4])

	#Add the center offsets
	grid = np.arange(grid_size)
	a,b = np.meshgrid(grid, grid)

	x_offset = torch.FloatTensor(a).view(-1,1)
	y_offset = torch.FloatTensor(b).view(-1,1)

	if CUDA:
	x_offset = x_offset.cuda()
	y_offset = y_offset.cuda()

	x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1,num_anchors).view(-1,2).unsqueeze(0)

	prediction[:,:,:2] += x_y_offset

	#log space transform height and the width
	anchors = torch.FloatTensor(anchors)

	if CUDA:
	anchors = anchors.cuda()

	anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0)
	prediction[:,:,2:4] = torch.exp(prediction[:,:,2:4])*anchors

	prediction[:,:,5: 5 + num_classes] = torch.sigmoid((prediction[:,:, 5 : 5 + num_classes]))

	prediction[:,:,:4] *= stride

	return prediction


	def write_results(prediction, confidence, num_classes, nms_conf = 0.4):
	conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
	prediction = prediction*conf_mask

	box_corner = prediction.new(prediction.shape)
	box_corner[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
	box_corner[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
	box_corner[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
	box_corner[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
	prediction[:,:,:4] = box_corner[:,:,:4]

	batch_size = prediction.size(0)

	write = False



	for ind in range(batch_size):
	image_pred = prediction[ind] #image Tensor
	#confidence threshholding
	#NMS

	max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
	max_conf = max_conf.float().unsqueeze(1)
	max_conf_score = max_conf_score.float().unsqueeze(1)
	seq = (image_pred[:,:5], max_conf, max_conf_score)
	image_pred = torch.cat(seq, 1)

	non_zero_ind = (torch.nonzero(image_pred[:,4]))
	try:
	image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
	except:
	continue

	if image_pred_.shape[0] == 0:
	continue
	#

	#Get the various classes detected in the image
	img_classes = unique(image_pred_[:,-1]) # -1 index holds the class index


	for cls in img_classes:
	#perform NMS


	#get the detections with one particular class
	cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
	class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
	image_pred_class = image_pred_[class_mask_ind].view(-1,7)

	#sort the detections such that the entry with the maximum objectness
	#confidence is at the top
	conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
	image_pred_class = image_pred_class[conf_sort_index]
	idx = image_pred_class.size(0) #Number of detections

	for i in range(idx):
	#Get the IOUs of all boxes that come after the one we are looking at
	#in the loop
	try:
	ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
	except ValueError:
	break

	except IndexError:
	break

	#Zero out all the detections that have IoU > treshhold
	iou_mask = (ious < nms_conf).float().unsqueeze(1)
	image_pred_class[i+1:] *= iou_mask

	#Remove the non-zero entries
	non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
	image_pred_class = image_pred_class[non_zero_ind].view(-1,7)

	batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) #Repeat the batch_id for as many detections of the class cls in the image
	seq = batch_ind, image_pred_class

	if not write:
	output = torch.cat(seq,1)
	write = True
	else:
	out = torch.cat(seq,1)
	output = torch.cat((output,out))

	try:
	return output
	except:
	return 0


	def letterbox_image(img, inp_dim):
	'''resize image with unchanged aspect ratio using padding'''
	img_w, img_h = img.shape[1], img.shape[0]
	w, h = inp_dim
	new_w = int(img_w * min(w/img_w, h/img_h))
	new_h = int(img_h * min(w/img_w, h/img_h))
	resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)

	canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)

	canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image

	return canvas


	def prep_image(img, inp_dim):
	"""
	Prepare image for inputting to the neural network.

	Returns a Variable
	"""
	img = (letterbox_image(img, (inp_dim, inp_dim)))
	img = img[:,:,::-1].transpose((2,0,1)).copy()
	img = torch.from_numpy(img).float().div(255.0).unsqueeze(0)
	return img


	def load_classes(namesfile):
	fp = open(namesfile, "r")
	names = fp.read().split("\n")[:-1]
	return names