# Image to GPS Project - ConvNext, MobileNet and EfficientNet Ensemble ```bash ## Training Data Statistics lat_mean = 39.951537011424264 lat_std = 0.0006940325318781937 lon_mean = -75.19152009539549 lon_std = 0.0007607716964655242 ``` ## How to Load the Model and Perform Inference ```bash # install dependencies pip install geopy datasets torch torchvision huggingface_hub # import packages import numpy as np from geopy.distance import geodesic import torch from torch.utils.data import DataLoader, Dataset from torchvision import transforms import torch.nn as nn from torchvision.models import mobilenet_v2, MobileNet_V2_Weights, convnext_tiny, ConvNeXt_Tiny_Weights, efficientnet_b0, EfficientNet_B0_Weights from datasets import load_dataset from huggingface_hub import hf_hub_download # load the model repo_id = "cis519projectA/Ensemble_ConvNeXt_MobileNet_EfficientNet_Weight_Adjustment" filename = "custom_ensemble_weight_adjust.pth" model_path = hf_hub_download(repo_id=repo_id, filename=filename) # define models class CustomEfficientNetModel(nn.Module): def __init__(self, weights=EfficientNet_B0_Weights.DEFAULT, num_classes=2): super().__init__() self.efficientnet = efficientnet_b0(weights=weights) in_features = self.efficientnet.classifier[1].in_features self.efficientnet.classifier = nn.Sequential( nn.Linear(in_features, 512), nn.ReLU(), nn.Dropout(p=0.3), nn.Linear(512, num_classes) ) for param in self.efficientnet.features[:3].parameters(): param.requires_grad = False def forward(self, x): return self.efficientnet(x) class CustomConvNeXtModel(nn.Module): def __init__(self, weights=ConvNeXt_Tiny_Weights.DEFAULT, num_classes=2): super().__init__() self.convnext = convnext_tiny(weights=weights) in_features = self.convnext.classifier[2].in_features self.convnext.classifier = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(in_features, 512), nn.BatchNorm1d(512), nn.ReLU(), nn.Dropout(p=0.3), nn.Linear(512, num_classes) ) for param in self.convnext.features[:4].parameters(): param.requires_grad = False def forward(self, x): return self.convnext(x) class CustomMobileNetModel(nn.Module): def __init__(self, weights=MobileNet_V2_Weights.DEFAULT, num_classes=2): super().__init__() self.mobilenet = mobilenet_v2(weights=weights) in_features = self.mobilenet.classifier[1].in_features self.mobilenet.classifier = nn.Sequential( nn.Linear(in_features, 1024), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(512, num_classes) ) for param in self.mobilenet.features[:5].parameters(): param.requires_grad = False def forward(self, x): return self.mobilenet(x) class EnsembleModel(nn.Module): def __init__(self, convnext_model, mobilenet_model, efficientnet_model, num_classes=2): super().__init__() self.convnext = convnext_model self.mobilenet = mobilenet_model self.efficientnet = efficientnet_model self.weight_convnext = nn.Parameter(torch.tensor(1.0)) self.weight_mobilenet = nn.Parameter(torch.tensor(1.0)) self.weight_efficientnet = nn.Parameter(torch.tensor(1.0)) self.fc = nn.Sequential( nn.Linear(num_classes * 3, 512), nn.ReLU(), nn.Dropout(p=0.3), nn.Linear(512, num_classes) ) def forward(self, x): convnext_out = self.convnext(x) mobilenet_out = self.mobilenet(x) efficientnet_out = self.efficientnet(x) weights = torch.softmax(torch.stack([self.weight_convnext, self.weight_mobilenet, self.weight_efficientnet]), dim=0) combined = (weights[0] * convnext_out + weights[1] * mobilenet_out + weights[2] * efficientnet_out) return combined device = torch.device("cuda" if torch.cuda.is_available() else "cpu") convnext_model = CustomConvNeXtModel(weights=ConvNeXt_Tiny_Weights.DEFAULT, num_classes=2) mobilenet_model = CustomMobileNetModel(weights=MobileNet_V2_Weights.DEFAULT, num_classes=2) efficientnet_model = CustomEfficientNetModel(weights=EfficientNet_B0_Weights.DEFAULT, num_classes=2) ensemble_model = EnsembleModel(convnext_model, mobilenet_model, efficientnet_model, num_classes=2).to(device) # load the model weights device = torch.device("cuda" if torch.cuda.is_available() else "cpu") state_dict = torch.load(model_path, map_location=device) ensemble_model.load_state_dict(state_dict) ensemble_model.to(device) ensemble_model.eval() # load the dataset dataset_test = load_dataset("gydou/released_img", split="train") # define transformers inference_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Parameters for denormalization lat_mean = 39.951537011424264 lat_std = 0.0006940325318781937 lon_mean = -75.19152009539549 lon_std = 0.0007607716964655242 class GPSImageDataset(Dataset): def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None): self.hf_dataset = hf_dataset self.transform = transform self.latitude_mean = lat_mean self.latitude_std = lat_std self.longitude_mean = lon_mean self.longitude_std = lon_std def __len__(self): return len(self.hf_dataset) def __getitem__(self, idx): example = self.hf_dataset[idx] image = example['image'] latitude = example['Latitude'] longitude = example['Longitude'] if self.transform: image = self.transform(image) latitude = (latitude - self.latitude_mean) / self.latitude_std longitude = (longitude - self.longitude_mean) / self.longitude_std gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32) return image, gps_coords # transform test data test_dataset = GPSImageDataset( hf_dataset=dataset_test, transform=inference_transform, lat_mean=lat_mean, lat_std=lat_std, lon_mean=lon_mean, lon_std=lon_std ) test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4) # evaluate def evaluate_model_single_batch(model, dataloader, lat_mean, lat_std, lon_mean, lon_std): all_distances = [] model.eval() with torch.no_grad(): for batch_idx, (images, gps_coords) in enumerate(dataloader): images, gps_coords = images.to(device), gps_coords.to(device) outputs = model(images) preds_denorm = outputs.cpu().numpy() * np.array([lat_std, lon_std]) + np.array([lat_mean, lon_mean]) actuals_denorm = gps_coords.cpu().numpy() * np.array([lat_std, lon_std]) + np.array([lat_mean, lon_mean]) for pred, actual in zip(preds_denorm, actuals_denorm): distance = geodesic((actual[0], actual[1]), (pred[0], pred[1])).meters all_distances.append(distance) break mean_error = np.mean(all_distances) rmse_error = np.sqrt(np.mean(np.square(all_distances))) return mean_error, rmse_error # Evaluate using only one batch mean_error, rmse_error = evaluate_model_single_batch( ensemble_model, test_dataloader, lat_mean, lat_std, lon_mean, lon_std ) print(f"Mean Error (meters): {mean_error:.2f}, RMSE (meters): {rmse_error:.2f}") ```