import cv2 import numpy as np import onnxruntime as rt import gradio as gr def image_preprocess(image): img_height, img_width = image.shape[0:2] image_converted = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640] h, w, _ = image.shape # [1944, 2592] scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469 nw, nh = int(scale * w), int(scale * h) # [640, 480] image_resized = cv2.resize(image_converted, (nw, nh)) image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0) dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80] image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized # image_padded[80:256, 32:224] image_padded = image_padded / 255. # image_resized = image_resized / 255. image_padded = image_padded[np.newaxis, ...].astype(np.float32) image_padded = np.moveaxis(image_padded, -1, 1) return image_padded, img_width, img_height, image def inference(model_name, image_data): sess = rt.InferenceSession(model_name) outputs = sess.get_outputs() output_names = list(map(lambda output: output.name, outputs)) input_name = sess.get_inputs()[0].name detections = sess.run(output_names, {input_name: image_data}) return detections def draw_detections(img, box, score, class_id): # Extract the coordinates of the bounding box x1, y1, x2, y2 = box # Retrieve the color for the class ID color = color_palette_pred[class_id] # Draw the bounding box on the image cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), color, 3) # Create the label text with class name and score label = f'{classes[class_id]}: {score:.2f}' # Calculate the dimensions of the label text (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1) # Calculate the position of the label text label_x = x1 label_y = y1 - 10 if y1 - 10 > label_height else y1 + 10 # Draw a filled rectangle as the background for the label text label_x = round(label_x) label_y = round(label_y) cv2.rectangle(img, (label_x, label_y - label_height), (label_x + label_width, label_y + label_height), color, cv2.FILLED) # Draw the label text on the image cv2.putText(img, label, (label_x, label_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA) def postprocess(detections, img_width, img_height, input_size, original_image): boxes = detections[1][0] scores = detections[2][0] class_ids = detections[3][0] # width_scale = img_width / input_size # height_scale = img_height / input_size ih, iw = [input_size, input_size] # [input_size, input_size] = [640, 640] h, w = img_height, img_width # [1944, 2592] scale = min(iw/w, ih/h) # min(0.2469, 0.3292) = 0.2469 nw, nh = int(scale * w), int(scale * h) # [640, 480] dw, dh = (iw - nw) // 2, (ih-nh) // 2 # [0, 80] # dh, dw = 0, 0 # scale = 0.3292 new_boxes = [] for box, score, class_id in zip(boxes, scores, class_ids): x1, y1, x2, y2 = box x1 = (x1 - dw) / scale y1 = (y1 - dh) / scale x2 = (x2 - dw) / scale y2 = (y2 - dh) / scale box = [x1, y1, x2, y2] draw_detections(original_image, box, score, class_id) new_boxes.append(box) return [class_ids, scores, new_boxes] # fix all the variables with open("coco_names.txt", "r") as f: content = f.readlines() content = "".join(content) classes = content.split("\n") color_palette_pred = np.random.uniform(0, 255, size=(len(classes), 3)) model_names = {"YOLOv8n":"yolov8n 640 mask_300000_3000_0.3_0.7.onnx", "YOLOv8s":"yolov8s 640 mask_300000_3000_0.3_0.7.onnx", "YOLOv8m":"yolov8m 640 mask_300000_3000_0.3_0.7.onnx", "YOLOv8l":"yolov8l 640 mask_300000_3000_0.3_0.7.onnx", "YOLOv8x":"yolov8x 640 mask_300000_3000_0.3_0.7.onnx"} input_size = 640 def run(img_path, model_ind:int): image_data, img_width, img_height, original_image = image_preprocess(img_path) model_name = model_names[model_ind] detections = inference(model_name, image_data) l = postprocess(detections, img_width, img_height, input_size, original_image) return original_image demo = gr.Interface( fn=run, inputs=["image", gr.Dropdown(["YOLOv8n", "YOLOv8s", "YOLOv8m", "YOLOv8l", "YOLOv8x"], label="Model", value="YOLOv8n", info="The larger the model, the slower and more performant it is.")], outputs=["image"], examples=[["crowd.jpeg", "YOLOv8n"],["crowd.jpeg", "YOLOv8s"],["crowd.jpeg", "YOLOv8m"],["crowd.jpeg", "YOLOv8l"],["crowd.jpeg", "YOLOv8x"],] ) demo.launch()