import h5py import gradio as gr import scipy.io as io import matplotlib.pyplot as plt import PIL.Image as Image import numpy as np from torchvision import transforms import scipy import json from matplotlib import cm as CM import torch.nn as nn import torch from torchvision import models class CSRNet(nn.Module): def __init__(self, load_weights=False): super(CSRNet, self).__init__() self.seen = 0 self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512] self.backend_feat = [512, 512, 512, 256, 128, 64] self.frontend = make_layers(self.frontend_feat) self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True) self.output_layer = nn.Conv2d(64, 1, kernel_size=1) if not load_weights: mod = models.vgg16(pretrained=True) self._initialize_weights() mod_dict = mod.state_dict() frontend_dict = self.frontend.state_dict() for k, v in mod_dict.items(): if k in frontend_dict: frontend_dict[k].data = v.data def forward(self,x): x = self.frontend(x) x = self.backend(x) x = self.output_layer(x) return x def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False): if dilation: d_rate = 2 else: d_rate = 1 layers = [] for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers) # Load the CSRNet model csrmodel = CSRNet(load_weights=True).cpu() checkpoint = torch.load("model.pt", map_location=torch.device('cpu')) csrmodel.load_state_dict(checkpoint) csrmodel.eval() # Set the transformation for image preprocessing transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Define the prediction function def predict_count(input_image): image = transform(input_image).unsqueeze(0).cpu() output = csrmodel(image) predicted_count = int(output.detach().cpu().sum().numpy()) density_map = output.detach().cpu().numpy().reshape(output.shape[2], output.shape[3]) density_map_color = plt.cm.jet(density_map / np.max(density_map)) return predicted_count, density_map_color from gradio.components import Image from gradio.components import Label, Image input_interface = gr.inputs.Image(label="Input Image") output_interface = [ Label(label="Predicted Count"), Image(label="Density Map", type="numpy") ] # Create the Gradio app with both interfaces grapp = gr.Interface(fn=predict_count, inputs=input_interface, outputs=output_interface) # Launch the app grapp.launch()