import cv2
import numpy as np
import onnxruntime as rt
import gradio as gr

def image_preprocess(image):

  img_height, img_width = image.shape[0:2]
  image_converted = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640]
  h, w, _ = image.shape # [1944, 2592]

  scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469
  nw, nh = int(scale * w), int(scale * h) # [640, 480]
  image_resized = cv2.resize(image_converted, (nw, nh))

  image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
  dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80]
  image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized # image_padded[80:256, 32:224]
  image_padded = image_padded / 255.
  # image_resized = image_resized / 255.
  image_padded = image_padded[np.newaxis, ...].astype(np.float32)
  image_padded = np.moveaxis(image_padded, -1, 1)


  return image_padded, img_width, img_height, image
  
  
def inference(model_name, image_data):
  sess = rt.InferenceSession(model_name)
  outputs = sess.get_outputs()
  output_names = list(map(lambda output: output.name, outputs))
  input_name = sess.get_inputs()[0].name
  detections = sess.run(output_names, {input_name: image_data})
  return detections
  
def draw_detections(img, box, score, class_id):

    # Extract the coordinates of the bounding box
    x1, y1, x2, y2 = box

    # Retrieve the color for the class ID
    color = color_palette_pred[class_id]

    # Draw the bounding box on the image
    cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 3)

    # Create the label text with class name and score
    label = f'{classes[class_id]}: {score:.2f}'

    # Calculate the dimensions of the label text
    (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

    # Calculate the position of the label text
    label_x = x1
    label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10

    # Draw a filled rectangle as the background for the label text
    label_x = round(label_x)
    label_y = round(label_y)
    cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED)

    # Draw the label text on the image
    cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)
    
    
def postprocess(detections, img_width, img_height, input_size, original_image):
  boxes = detections[1][0]
  scores = detections[2][0]
  class_ids = detections[3][0]
  # width_scale = img_width / input_size
  # height_scale = img_height / input_size

  ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640]
  h, w = img_height, img_width # [1944, 2592]
  scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469
  nw, nh = int(scale * w), int(scale * h) # [640, 480]
  dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80]
  # dh, dw = 0, 0
  # scale = 0.3292
  new_boxes = []
  for box, score, class_id in zip(boxes, scores, class_ids):

    x1, y1, x2, y2 = box
    x1 = (x1 - dw) / scale
    y1 = (y1 - dh) / scale
    x2 = (x2 - dw) / scale
    y2 = (y2 - dh) / scale
    box = [x1, y1, x2, y2]
    draw_detections(original_image, box, score, class_id)
    new_boxes.append(box)
  return [class_ids, scores, new_boxes]
  
  
# fix all the variables
with open("coco_names.txt", "r") as f:
    content = f.readlines()

content = "".join(content)
classes = content.split("\n")
color_palette_pred = np.random.uniform(0, 255, size=(len(classes), 3))  
  
model_names = {"YOLOv8n":"yolov8n 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8s":"yolov8s 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8m":"yolov8m 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8l":"yolov8l 640 mask_300000_3000_0.3_0.7.onnx",
               "YOLOv8x":"yolov8x 640 mask_300000_3000_0.3_0.7.onnx"}
input_size = 640


def run(img_path, model_ind:int):
    image_data, img_width, img_height, original_image = image_preprocess(img_path)
    model_name = model_names[model_ind]
    detections = inference(model_name, image_data)
    l = postprocess(detections, img_width, img_height, input_size, original_image)
    return original_image  
  
demo = gr.Interface(
    fn=run,
    inputs=["image", gr.Dropdown(["YOLOv8n", "YOLOv8s", "YOLOv8m", "YOLOv8l", "YOLOv8x"], label="Model", value="YOLOv8n", info="The larger the model, the slower and more performant it is.")],
    outputs=["image"],
    examples=[["crowd.jpeg", "YOLOv8n"],["crowd.jpeg", "YOLOv8s"],["crowd.jpeg", "YOLOv8m"],["crowd.jpeg", "YOLOv8l"],["crowd.jpeg", "YOLOv8x"],]
)

demo.launch()