final-project-5190
/

model-resnet-50-base

PyTorch

custom-resnet

Model card Files Files and versions Community

lling0212 commited on Dec 14, 2024

Commit

42f6af3

1 Parent(s): 96fbad0

Add README.md

Browse files

Files changed (1) hide show

README.md +513 -0

README.md ADDED Viewed

	@@ -0,0 +1,513 @@

+### Relevant imports & set up
+```python
+!pip install geopy > delete.txt
+!pip install datasets > delete.txt
+!pip install torch torchvision datasets > delete.txt
+!pip install huggingface_hub > delete.txt
+!rm delete.txt
+```
+```python
+!pip install transformers
+import transformers
+```
+```python
+!huggingface-cli login --token [your_token]
+```
+```python
+lat_mean = 39.95156937654321
+lat_std = 0.0005992518588323268
+lon_mean = -75.19136795987654
+lon_std = 0.0007030395253318959
+```
+### Instructions
+Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run all the model definitions.
+#### Load and define models
+1. ConvNeXt
+```python
+from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel
+import torch
+import torch.nn as nn
+import os
+from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
+class CustomConvNeXtConfig(PretrainedConfig):
+    model_type = "custom-convnext"
+    def __init__(self, num_labels=2, **kwargs):
+        super().__init__(**kwargs)
+        self.num_labels = num_labels  # Register number of labels (output dimensions)
+class CustomConvNeXtModel(PreTrainedModel):
+    config_class = CustomConvNeXtConfig
+    def __init__(self, config, model_name="facebook/convnext-tiny-224",
+                 num_classes=2, train_final_layer_only=False):
+        super().__init__(config)
+        # Load pre-trained ConvNeXt model from Hugging Face
+        self.convnext = AutoModelForImageClassification.from_pretrained(model_name)
+        # Access the input features of the existing classifier
+        in_features = self.convnext.classifier.in_features
+        # Modify the classifier layer to match the number of output classes
+        self.convnext.classifier = nn.Linear(in_features, num_classes)
+        # Freeze previous weights if only training the final layer
+        if train_final_layer_only:
+            for name, param in self.convnext.named_parameters():
+                if "classifier" not in name:
+                    param.requires_grad = False
+                else:
+                    print(f"Unfrozen layer: {name}")
+    def forward(self, x):
+        return self.convnext(x)
+    @classmethod
+    def from_pretrained(cls, repo_id, model_name="facebook/convnext-tiny-224", **kwargs):
+        """Load model weights and configuration from Hugging Face Hub."""
+        # Download model.safetensors from Hugging Face Hub
+        model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
+        # Download config.json from Hugging Face Hub
+        config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+        # Load configuration
+        config = CustomConvNeXtConfig.from_pretrained(config_path)
+        # Create the model
+        model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
+        # Load state_dict from safetensors file
+        from safetensors.torch import load_file  # Safetensors library
+        state_dict = load_file(model_path)
+        model.load_state_dict(state_dict)
+        return model
+```
+2. ResNet
+``` python
+from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
+class CustomResNetConfig(PretrainedConfig):
+    model_type = "custom-resnet"
+    def __init__(self, num_labels=2, **kwargs):
+        super().__init__(**kwargs)
+        self.num_labels = num_labels  # Register number of labels (output dimensions)
+class CustomResNetModel(nn.Module, PyTorchModelHubMixin):
+    config_class = CustomResNetConfig
+    def __init__(self, model_name="microsoft/resnet-18",
+                 num_classes=2,
+                 train_final_layer_only=False):
+        super().__init__()
+        # Load pre-trained ResNet model from Hugging Face
+        self.resnet = AutoModelForImageClassification.from_pretrained(model_name)
+        # Access the Linear layer within the Sequential classifier
+        in_features = self.resnet.classifier[1].in_features  # Accessing the Linear layer within the Sequential
+        # Modify the classifier layer to have the desired number of output classes
+        self.resnet.classifier = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(in_features, num_classes)
+        )
+        self.config = CustomResNetConfig(num_labels=num_classes)
+        # Freeze previous weights
+        if train_final_layer_only:
+            for name, param in self.resnet.named_parameters():
+                if "classifier" not in name:
+                    param.requires_grad = False
+                else:
+                    print(f"Unfrozen layer: {name}")
+    def forward(self, x):
+        return self.resnet(x)
+    def save_pretrained(self, save_directory, **kwargs):
+        """Save model weights and custom configuration in Hugging Face format."""
+        os.makedirs(save_directory, exist_ok=True)
+        # Save model weights
+        torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))
+        # Save configuration
+        self.config.save_pretrained(save_directory)
+    @classmethod
+    def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs):
+        """Load model weights and configuration from Hugging Face Hub or local directory."""
+        # Download pytorch_model.bin from Hugging Face Hub
+        model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
+        # Download config.json from Hugging Face Hub
+        config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+        # Load configuration
+        config = CustomResNetConfig.from_pretrained(config_path)
+        # Create the model
+        model = cls(model_name=model_name, num_classes=config.num_labels)
+        # Load state_dict
+        model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
+        return model
+```
+3. EfficientNet
+``` python
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file  # Make sure to import this
+from transformers import AutoModelForImageClassification, PreTrainedModel, PretrainedConfig
+import torch.nn as nn
+class CustomEfficientNetConfig(PretrainedConfig):
+    model_type = "custom-efficientnet"
+    def __init__(self, num_labels=2, **kwargs):
+        super().__init__(**kwargs)
+        self.num_labels = num_labels  # Register number of labels (output dimensions)
+class CustomEfficientNetModel(PreTrainedModel):
+    config_class = CustomEfficientNetConfig
+    def __init__(self, config, model_name="google/efficientnet-b0",
+                 num_classes=2, train_final_layer_only=False):
+        super().__init__(config)
+        # Load pre-trained EfficientNet model from Hugging Face
+        self.efficientnet = AutoModelForImageClassification.from_pretrained(model_name)
+        # Access the input features of the existing classifier
+        in_features = self.efficientnet.classifier.in_features
+        # Modify the classifier layer to match the number of output classes
+        self.efficientnet.classifier = nn.Sequential(
+            nn.Linear(in_features, num_classes)
+        )
+        # Freeze previous weights if only training the final layer
+        if train_final_layer_only:
+            for name, param in self.efficientnet.named_parameters():
+                if "classifier" not in name:
+                    param.requires_grad = False
+                else:
+                    print(f"Unfrozen layer: {name}")
+    def forward(self, x):
+        return self.efficientnet(x)
+    @classmethod
+    def from_pretrained(cls, repo_id, model_name="google/efficientnet-b0", **kwargs):
+        """Load model weights and configuration from Hugging Face Hub."""
+        # Attempt to download the safetensors model file
+        try:
+            model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
+            state_dict = load_file(model_path)
+        except Exception as e:
+            raise ValueError(
+                f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
+            ) from e
+        # Download config.json from Hugging Face Hub
+        config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+        # Load configuration
+        config = CustomEfficientNetConfig.from_pretrained(config_path)
+        # Create the model
+        model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
+        # Load the state_dict into the model
+        model.load_state_dict(state_dict)
+        return model
+```
+4. ViT
+```python
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+from transformers import AutoModelForImageClassification, PreTrainedModel, PretrainedConfig
+import torch.nn as nn
+class CustomViTConfig(PretrainedConfig):
+    model_type = "custom-vit"
+    def __init__(self, num_labels=2, **kwargs):
+        super().__init__(**kwargs)
+        self.num_labels = num_labels  # Register number of labels (output dimensions)
+class CustomViTModel(PreTrainedModel):
+    config_class = CustomViTConfig
+    def __init__(self, config, model_name="google/vit-base-patch16-224",
+                 num_classes=2, train_final_layer_only=False):
+        super().__init__(config)
+        # Load pre-trained ViT model from Hugging Face
+        self.vit = AutoModelForImageClassification.from_pretrained(model_name)
+        # Access the input features of the existing classifier
+        in_features = self.vit.classifier.in_features
+        # Modify the classifier layer to match the number of output classes
+        self.vit.classifier = nn.Linear(in_features, num_classes)
+        # Freeze previous weights if only training the final layer
+        if train_final_layer_only:
+            for name, param in self.vit.named_parameters():
+                if "classifier" not in name:
+                    param.requires_grad = False
+                else:
+                    print(f"Unfrozen layer: {name}")
+    def forward(self, x):
+        return self.vit(x)
+    @classmethod
+    def from_pretrained(cls, repo_id, model_name="google/vit-base-patch16-224", **kwargs):
+        # Attempt to download the safetensors model file
+        try:
+            model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
+            state_dict = load_file(model_path)
+        except Exception as e:
+            raise ValueError(
+                f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
+            ) from e
+        # Download config.json from Hugging Face Hub
+        config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
+        # Load configuration
+        config = CustomViTConfig.from_pretrained(config_path)
+        # Create the model
+        model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
+        # Load the state_dict into the model
+        model.load_state_dict(state_dict)
+        return model
+```
+Now, load the model weights from huggingface.
+```python
+from transformers import AutoModelForImageClassification
+import torch
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+import matplotlib.pyplot as plt
+import numpy as np
+```
+```python
+#resnet
+resnet = CustomResNetModel.from_pretrained(
+    "final-project-5190/model-resnet-50-base",
+    model_name="microsoft/resnet-50"
+)
+```
+```python
+#convnext
+convnext=CustomConvNeXtModel.from_pretrained(
+    "final-project-5190/model-convnext-tiny-reducePlateau",
+    model_name="facebook/convnext-tiny-224")
+```
+```python
+#vit
+vit = CustomViTModel.from_pretrained(
+    "final-project-5190/model-ViT-base",
+    model_name="google/vit-base-patch16-224"
+)
+```
+```python
+#efficientnet
+efficientnet = CustomEfficientNetModel.from_pretrained(
+    "final-project-5190/model-efficientnet-b0-base",
+    model_name="google/efficientnet-b0"
+)
+```
+#### For data loading
+```python
+# Download
+from datasets import load_dataset, Image
+```
+```python
+import torch
+import torch.nn as nn
+import torchvision.models as models
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader, Dataset
+from transformers import AutoImageProcessor, AutoModelForImageClassification, AutoConfig
+from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
+from PIL import Image
+import os
+import numpy as np
+class GPSImageDataset(Dataset):
+    def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None):
+        self.hf_dataset = hf_dataset
+        self.transform = transform
+        # Compute mean and std from the dataframe if not provided
+        self.latitude_mean = lat_mean if lat_mean is not None else np.mean(np.array(self.hf_dataset['Latitude']))
+        self.latitude_std = lat_std if lat_std is not None else np.std(np.array(self.hf_dataset['Latitude']))
+        self.longitude_mean = lon_mean if lon_mean is not None else np.mean(np.array(self.hf_dataset['Longitude']))
+        self.longitude_std = lon_std if lon_std is not None else np.std(np.array(self.hf_dataset['Longitude']))
+    def __len__(self):
+        return len(self.hf_dataset)
+    def __getitem__(self, idx):
+        # Extract data
+        example = self.hf_dataset[idx]
+        # Load and process the image
+        image = example['image']
+        latitude = example['Latitude']
+        longitude = example['Longitude']
+        # image = image.rotate(-90, expand=True)
+        if self.transform:
+            image = self.transform(image)
+        # Normalize GPS coordinates
+        latitude = (latitude - self.latitude_mean) / self.latitude_std
+        longitude = (longitude - self.longitude_mean) / self.longitude_std
+        gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32)
+        return image, gps_coords
+```
+```python
+# Dataloader + Visualize
+transform = transforms.Compose([
+    transforms.RandomResizedCrop(224),  # Random crop and resize to 224x224
+    transforms.RandomHorizontalFlip(),  # Random horizontal flip
+    # transforms.RandomRotation(degrees=15),  # Random rotation between -15 and 15 degrees
+    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random color jitter
+    # transforms.GaussianBlur(kernel_size=(3, 5), sigma=(0.1, 2.0)),
+    # transforms.RandomPerspective(distortion_scale=0.5, p=0.5),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225])
+])
+# Optionally, you can create a separate transform for inference without augmentations
+inference_transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                         std=[0.229, 0.224, 0.225])
+])
+```
+Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE.
+```python
+# Load test data
+release_data = load_dataset("gydou/released_img", split="train")
+```
+```python
+# Create dataset and dataloader using training mean and std
+rel_dataset = GPSImageDataset(
+    hf_dataset=release_data,
+    transform=inference_transform,
+    lat_mean=lat_mean,
+    lat_std=lat_std,
+    lon_mean=lon_mean,
+    lon_std=lon_std
+)
+rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False)
+```
+```python
+models = [convnext, resnet, vit, efficientnet]
+weights = [0.28, 0.26, 0.20, 0.27] # based on val 1/RMSE
+```
+```python
+# Release
+# Initialize lists to store predictions and actual values
+all_preds = []
+all_actuals = []
+# Move models to device and set them to evaluation mode
+for model in models:
+    model.to(device)
+    model.eval()
+# Perform inference on release dataset
+with torch.no_grad():
+    for images, gps_coords in rel_dataloader:
+        images, gps_coords = images.to(device), gps_coords.to(device)
+        # Weighted ensemble prediction
+        ensemble_logits = weighted_ensemble_predict(models, weights, images)
+        # Denormalize predictions and actual values
+        preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
+        actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
+        all_preds.append(preds)
+        all_actuals.append(actuals)
+# Concatenate all batches
+all_preds = torch.cat(all_preds).numpy()
+all_actuals = torch.cat(all_actuals).numpy()
+# Compute error metrics
+mae = mean_absolute_error(all_actuals, all_preds)
+rmse = mean_squared_error(all_actuals, all_preds, squared=False)
+print(f'Release Dataset Mean Absolute Error: {mae}')
+print(f'Release Dataset Root Mean Squared Error: {rmse}')
+# Convert predictions and actuals to meters
+latitude_mean_radians = np.radians(lat_mean)  # Convert to radians for cosine
+meters_per_degree_latitude = 111000  # Constant
+meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians)  # Adjusted for latitude mean
+all_preds_meters = all_preds.copy()
+all_preds_meters[:, 0] *= meters_per_degree_latitude  # Latitude to meters
+all_preds_meters[:, 1] *= meters_per_degree_longitude  # Longitude to meters
+all_actuals_meters = all_actuals.copy()
+all_actuals_meters[:, 0] *= meters_per_degree_latitude  # Latitude to meters
+all_actuals_meters[:, 1] *= meters_per_degree_longitude  # Longitude to meters
+# Compute error metrics in meters
+mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters)
+rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False)
+print(f"Mean Absolute Error (meters): {mae_meters:.2f}")
+print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}")
+```
+After running inference on the release test set, our results are the following.
+- Release Dataset Mean Absolute Error: 0.0004267849560326909
+- Release Dataset Root Mean Squared Error: 0.0005247778631268114
+- Mean Absolute Error (meters): 41.90
+- Root Mean Squared Error (meters): 51.29