### Relevant imports & set up ```python !pip install geopy > delete.txt !pip install datasets > delete.txt !pip install torch torchvision datasets > delete.txt !pip install huggingface_hub > delete.txt !rm delete.txt ``` ```python !pip install transformers import transformers ``` ```python !huggingface-cli login --token [your_token] ``` ```python lat_mean = 39.95156937654321 lat_std = 0.0005992518588323268 lon_mean = -75.19136795987654 lon_std = 0.0007030395253318959 ``` ### Instructions Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run the model definitions. #### Load and define models ```python from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel import torch import torch.nn as nn import os from huggingface_hub import PyTorchModelHubMixin, hf_hub_download from safetensors.torch import load_file class CustomConvNeXtConfig(PretrainedConfig): model_type = "custom-convnext" def __init__(self, num_labels=2, **kwargs): super().__init__(**kwargs) self.num_labels = num_labels # Register number of labels (output dimensions) class CustomConvNeXtModel(PreTrainedModel): config_class = CustomConvNeXtConfig def __init__(self, config, model_name="facebook/convnext-tiny-224", num_classes=2, train_final_layer_only=False): super().__init__(config) # Load pre-trained ConvNeXt model from Hugging Face self.convnext = AutoModelForImageClassification.from_pretrained(model_name) # Access the input features of the existing classifier in_features = self.convnext.classifier.in_features # Modify the classifier layer to match the number of output classes self.convnext.classifier = nn.Linear(in_features, num_classes) # Freeze previous weights if only training the final layer if train_final_layer_only: for name, param in self.convnext.named_parameters(): if "classifier" not in name: param.requires_grad = False else: print(f"Unfrozen layer: {name}") def forward(self, x): return self.convnext(x) @classmethod def from_pretrained(cls, repo_id, model_name="facebook/convnext-tiny-224", **kwargs): """Load model weights and configuration from Hugging Face Hub.""" # Download model.safetensors from Hugging Face Hub model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors") # Download config.json from Hugging Face Hub config_path = hf_hub_download(repo_id=repo_id, filename="config.json") # Load configuration config = CustomConvNeXtConfig.from_pretrained(config_path) # Create the model model = cls(config=config, model_name=model_name, num_classes=config.num_labels) # Load state_dict from safetensors file state_dict = load_file(model_path) model.load_state_dict(state_dict) return model class CustomResNetConfig(PretrainedConfig): model_type = "custom-resnet" def __init__(self, num_labels=2, **kwargs): super().__init__(**kwargs) self.num_labels = num_labels # Register number of labels (output dimensions) class CustomResNetModel(nn.Module, PyTorchModelHubMixin): config_class = CustomResNetConfig def __init__(self, model_name="microsoft/resnet-18", num_classes=2, train_final_layer_only=False): super().__init__() # Load pre-trained ResNet model from Hugging Face self.resnet = AutoModelForImageClassification.from_pretrained(model_name) # Access the Linear layer within the Sequential classifier in_features = self.resnet.classifier[1].in_features # Accessing the Linear layer within the Sequential # Modify the classifier layer to have the desired number of output classes self.resnet.classifier = nn.Sequential( nn.Flatten(), nn.Linear(in_features, num_classes) ) self.config = CustomResNetConfig(num_labels=num_classes) # Freeze previous weights if train_final_layer_only: for name, param in self.resnet.named_parameters(): if "classifier" not in name: param.requires_grad = False else: print(f"Unfrozen layer: {name}") def forward(self, x): return self.resnet(x) def save_pretrained(self, save_directory, **kwargs): """Save model weights and custom configuration in Hugging Face format.""" os.makedirs(save_directory, exist_ok=True) # Save model weights torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin")) # Save configuration self.config.save_pretrained(save_directory) @classmethod def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs): """Load model weights and configuration from Hugging Face Hub or local directory.""" # Download pytorch_model.bin from Hugging Face Hub model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin") # Download config.json from Hugging Face Hub config_path = hf_hub_download(repo_id=repo_id, filename="config.json") # Load configuration config = CustomResNetConfig.from_pretrained(config_path) # Create the model model = cls(model_name=model_name, num_classes=config.num_labels) # Load state_dict model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"))) return model class CustomEfficientNetConfig(PretrainedConfig): model_type = "custom-efficientnet" def __init__(self, num_labels=2, **kwargs): super().__init__(**kwargs) self.num_labels = num_labels # Register number of labels (output dimensions) class CustomEfficientNetModel(PreTrainedModel): config_class = CustomEfficientNetConfig def __init__(self, config, model_name="google/efficientnet-b0", num_classes=2, train_final_layer_only=False): super().__init__(config) # Load pre-trained EfficientNet model from Hugging Face self.efficientnet = AutoModelForImageClassification.from_pretrained(model_name) # Access the input features of the existing classifier in_features = self.efficientnet.classifier.in_features # Modify the classifier layer to match the number of output classes self.efficientnet.classifier = nn.Sequential( nn.Linear(in_features, num_classes) ) # Freeze previous weights if only training the final layer if train_final_layer_only: for name, param in self.efficientnet.named_parameters(): if "classifier" not in name: param.requires_grad = False else: print(f"Unfrozen layer: {name}") def forward(self, x): return self.efficientnet(x) @classmethod def from_pretrained(cls, repo_id, model_name="google/efficientnet-b0", **kwargs): """Load model weights and configuration from Hugging Face Hub.""" # Attempt to download the safetensors model file try: model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors") state_dict = load_file(model_path) except Exception as e: raise ValueError( f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists." ) from e # Download config.json from Hugging Face Hub config_path = hf_hub_download(repo_id=repo_id, filename="config.json") # Load configuration config = CustomEfficientNetConfig.from_pretrained(config_path) # Create the model model = cls(config=config, model_name=model_name, num_classes=config.num_labels) # Load the state_dict into the model model.load_state_dict(state_dict) return model class CustomViTConfig(PretrainedConfig): model_type = "custom-vit" def __init__(self, num_labels=2, **kwargs): super().__init__(**kwargs) self.num_labels = num_labels # Register number of labels (output dimensions) class CustomViTModel(PreTrainedModel): config_class = CustomViTConfig def __init__(self, config, model_name="google/vit-base-patch16-224", num_classes=2, train_final_layer_only=False): super().__init__(config) # Load pre-trained ViT model from Hugging Face self.vit = AutoModelForImageClassification.from_pretrained(model_name) # Access the input features of the existing classifier in_features = self.vit.classifier.in_features # Modify the classifier layer to match the number of output classes self.vit.classifier = nn.Linear(in_features, num_classes) # Freeze previous weights if only training the final layer if train_final_layer_only: for name, param in self.vit.named_parameters(): if "classifier" not in name: param.requires_grad = False else: print(f"Unfrozen layer: {name}") def forward(self, x): return self.vit(x) @classmethod def from_pretrained(cls, repo_id, model_name="google/vit-base-patch16-224", **kwargs): # Attempt to download the safetensors model file try: model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors") state_dict = load_file(model_path) except Exception as e: raise ValueError( f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists." ) from e # Download config.json from Hugging Face Hub config_path = hf_hub_download(repo_id=repo_id, filename="config.json") # Load configuration config = CustomViTConfig.from_pretrained(config_path) # Create the model model = cls(config=config, model_name=model_name, num_classes=config.num_labels) # Load the state_dict into the model model.load_state_dict(state_dict) return model # Define the WeightedEnsembleModel class class WeightedEnsembleModel(nn.Module): def __init__(self, models, weights): """ Initialize the ensemble model with individual models and their weights. """ super(WeightedEnsembleModel, self).__init__() self.models = nn.ModuleList(models) # Wrap models in ModuleList self.weights = weights def forward(self, images): """ Forward pass for the ensemble model. Performs weighted averaging of logits from individual models. """ ensemble_logits = torch.zeros((images.size(0), 2)).to(images.device) # Initialize logits for model, weight in zip(self.models, self.weights): outputs = model(images) logits = outputs.logits if hasattr(outputs, "logits") else outputs # Extract logits ensemble_logits += weight * logits # Weighted sum of logits return ensemble_logits ``` Now, load the model weights from huggingface. ```python from transformers import AutoModelForImageClassification import torch from sklearn.metrics import mean_absolute_error, mean_squared_error import matplotlib.pyplot as plt import numpy as np device = torch.device("cuda" if torch.cuda.is_available() else "cpu") ``` ```python #resnet resnet = CustomResNetModel.from_pretrained( "final-project-5190/model-resnet-50-base", model_name="microsoft/resnet-50" ) #convnext convnext=CustomConvNeXtModel.from_pretrained( "final-project-5190/model-convnext-tiny-reducePlateau", model_name="facebook/convnext-tiny-224") #vit vit = CustomViTModel.from_pretrained( "final-project-5190/model-ViT-base", model_name="google/vit-base-patch16-224" ) #efficientnet efficientnet = CustomEfficientNetModel.from_pretrained( "final-project-5190/model-efficientnet-b0-base", model_name="google/efficientnet-b0" ) models = [convnext, resnet, vit, efficientnet] weights = [0.28, 0.26, 0.20, 0.27] ``` #### For data loading ```python # Download from datasets import load_dataset, Image ``` ```python import torch import torch.nn as nn import torchvision.models as models import torchvision.transforms as transforms from torch.utils.data import DataLoader, Dataset from transformers import AutoImageProcessor, AutoModelForImageClassification, AutoConfig from huggingface_hub import PyTorchModelHubMixin, hf_hub_download from PIL import Image import os import numpy as np class GPSImageDataset(Dataset): def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None): self.hf_dataset = hf_dataset self.transform = transform # Compute mean and std from the dataframe if not provided self.latitude_mean = lat_mean if lat_mean is not None else np.mean(np.array(self.hf_dataset['Latitude'])) self.latitude_std = lat_std if lat_std is not None else np.std(np.array(self.hf_dataset['Latitude'])) self.longitude_mean = lon_mean if lon_mean is not None else np.mean(np.array(self.hf_dataset['Longitude'])) self.longitude_std = lon_std if lon_std is not None else np.std(np.array(self.hf_dataset['Longitude'])) def __len__(self): return len(self.hf_dataset) def __getitem__(self, idx): # Extract data example = self.hf_dataset[idx] # Load and process the image image = example['image'] latitude = example['Latitude'] longitude = example['Longitude'] # image = image.rotate(-90, expand=True) if self.transform: image = self.transform(image) # Normalize GPS coordinates latitude = (latitude - self.latitude_mean) / self.latitude_std longitude = (longitude - self.longitude_mean) / self.longitude_std gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32) return image, gps_coords ``` ```python # Dataloader + Visualize transform = transforms.Compose([ transforms.RandomResizedCrop(224), # Random crop and resize to 224x224 transforms.RandomHorizontalFlip(), # Random horizontal flip # transforms.RandomRotation(degrees=15), # Random rotation between -15 and 15 degrees transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Random color jitter # transforms.GaussianBlur(kernel_size=(3, 5), sigma=(0.1, 2.0)), # transforms.RandomPerspective(distortion_scale=0.5, p=0.5), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # Optionally, you can create a separate transform for inference without augmentations inference_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ``` Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE. ```python # Load test data release_data = load_dataset("gydou/released_img", split="train") ``` ```python # Create dataset and dataloader using training mean and std rel_dataset = GPSImageDataset( hf_dataset=release_data, transform=inference_transform, lat_mean=lat_mean, lat_std=lat_std, lon_mean=lon_mean, lon_std=lon_std ) rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False) ``` ```python # ensemble ensemble_model = WeightedEnsembleModel(models=models, weights=weights).to(device) # Validation all_preds = [] all_actuals = [] ensemble_model.eval() with torch.no_grad(): for images, gps_coords in rel_dataloader: images, gps_coords = images.to(device), gps_coords.to(device) # Weighted ensemble prediction using the new model ensemble_logits = ensemble_model(images) # Denormalize predictions and actual values preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) all_preds.append(preds) all_actuals.append(actuals) # Concatenate all batches all_preds = torch.cat(all_preds).numpy() all_actuals = torch.cat(all_actuals).numpy() # Compute error metrics mae = mean_absolute_error(all_actuals, all_preds) rmse = mean_squared_error(all_actuals, all_preds, squared=False) print(f'Mean Absolute Error: {mae}') print(f'Root Mean Squared Error: {rmse}') # Convert predictions and actuals to meters latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine meters_per_degree_latitude = 111000 # Constant meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians) # Adjusted for latitude mean all_preds_meters = all_preds.copy() all_preds_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters all_preds_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters all_actuals_meters = all_actuals.copy() all_actuals_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters all_actuals_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters # Compute error metrics in meters mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters) rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False) print(f"Mean Absolute Error (meters): {mae_meters:.2f}") print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}") ``` After running inference on the release test set, our results are the following. - Release Dataset Mean Absolute Error: 0.0004267849560326909 - Release Dataset Root Mean Squared Error: 0.0005247778631268114 - Mean Absolute Error (meters): 41.90 - Root Mean Squared Error (meters): 51.29