|
This contains the instruction for running model 2 |
|
|
|
### Training data mean and std |
|
lat_mean: 39.95156937654321 |
|
lat_std: 0.0005992518588323268 |
|
lon_mean: -75.19136795987654 |
|
lon_std: 0.0007030395253318959 |
|
|
|
|
|
### Instruction to run and test the model |
|
|
|
Relevant imports |
|
```python |
|
from transformers import PretrainedConfig |
|
import torch.nn as nn |
|
import torch |
|
import torchvision.models as models |
|
import torchvision.transforms as transforms |
|
from torch.utils.data import DataLoader, Dataset |
|
from transformers import AutoImageProcessor, AutoModelForImageClassification |
|
from huggingface_hub import PyTorchModelHubMixin |
|
from PIL import Image |
|
import os |
|
import numpy as np |
|
from huggingface_hub import hf_hub_download |
|
|
|
lat_mean = 39.95156937654321 |
|
lat_std = 0.0005992518588323268 |
|
lon_mean = -75.19136795987654 |
|
lon_std = 0.0007030395253318959 |
|
``` |
|
|
|
Our model uses the CustomModel class. To use the model, first run the class definition. |
|
```python |
|
from transformers import PretrainedConfig |
|
|
|
class CustomResNetConfig(PretrainedConfig): |
|
model_type = "custom-resnet" |
|
|
|
def __init__(self, num_labels=2, **kwargs): |
|
super().__init__(**kwargs) |
|
self.num_labels = num_labels |
|
|
|
class CustomResNetModel(nn.Module, PyTorchModelHubMixin): |
|
config_class = CustomResNetConfig |
|
|
|
def __init__(self, model_name="microsoft/resnet-18", |
|
num_classes=2, |
|
train_final_layer_only=False): |
|
super().__init__() |
|
|
|
# Load pre-trained ResNet model from Hugging Face |
|
self.resnet = AutoModelForImageClassification.from_pretrained(model_name) |
|
|
|
# Access the Linear layer within the Sequential classifier |
|
in_features = self.resnet.classifier[1].in_features |
|
|
|
# Modify the classifier layer to have the desired number of output classes |
|
self.resnet.classifier = nn.Sequential( |
|
nn.Flatten(), |
|
nn.Linear(in_features, 128), |
|
nn.BatchNorm1d(128), |
|
nn.ReLU(), |
|
nn.Dropout(p=0.5), |
|
nn.Linear(128, num_classes) |
|
) |
|
|
|
self.config = CustomResNetConfig(num_labels=num_classes) |
|
|
|
# Freeze previous weights |
|
if train_final_layer_only: |
|
for name, param in self.resnet.named_parameters(): |
|
if "classifier" not in name: |
|
param.requires_grad = False |
|
else: |
|
print(f"Unfrozen layer: {name}") |
|
|
|
def forward(self, x): |
|
return self.resnet(x) |
|
|
|
def save_pretrained(self, save_directory, **kwargs): |
|
"""Save model weights and custom configuration in Hugging Face format.""" |
|
os.makedirs(save_directory, exist_ok=True) |
|
|
|
# Save model weights |
|
torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin")) |
|
|
|
# Save configuration |
|
self.config.save_pretrained(save_directory) |
|
|
|
@classmethod |
|
def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs): |
|
"""Load model weights and configuration from Hugging Face Hub or local directory.""" |
|
# Download pytorch_model.bin from Hugging Face Hub |
|
model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin") |
|
|
|
# Download config.json from Hugging Face Hub |
|
config_path = hf_hub_download(repo_id=repo_id, filename="config.json") |
|
|
|
# Load configuration |
|
config = CustomResNetConfig.from_pretrained(config_path) |
|
|
|
# Create the model |
|
model = cls(model_name=model_name, num_classes=config.num_labels) |
|
|
|
# Load state_dict |
|
model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu"))) |
|
|
|
return model |
|
|
|
|
|
``` |
|
|
|
Then load the model weights from huggingface from our repo. |
|
```python |
|
REPO_MODEL_NAME = "final-project-5190/model-2" |
|
BACKBONE_MODEL_NAME = "microsoft/resnet-50" |
|
model=CustomResNetModel.from_pretrained(REPO_MODEL_NAME, model_name=BACKBONE_MODEL_NAME) |
|
``` |
|
|
|
Now use the model for inference. Here is an example we ran on the release dataset. |
|
```python |
|
# Load test data |
|
release_data = load_dataset("gydou/released_img", split="train") |
|
|
|
# Create dataset and dataloader using training mean and std |
|
rel_dataset = GPSImageDataset( |
|
hf_dataset=release_data, |
|
transform=inference_transform, |
|
lat_mean=lat_mean, |
|
lat_std=lat_std, |
|
lon_mean=lon_mean, |
|
lon_std=lon_std |
|
) |
|
rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False) |
|
|
|
# Print MSE and root MSE |
|
from sklearn.metrics import mean_absolute_error, mean_squared_error |
|
|
|
# Ensure model is on the correct device |
|
model = model.to(device) |
|
|
|
# Initialize lists to store predictions and actual values |
|
all_preds = [] |
|
all_actuals = [] |
|
|
|
model.eval() |
|
with torch.no_grad(): |
|
for images, gps_coords in rel_dataloader: |
|
images, gps_coords = images.to(device), gps_coords.to(device) |
|
|
|
# Forward pass |
|
outputs = model(images) |
|
|
|
# Extract logits (predictions) |
|
logits = outputs.logits # Use .logits to get the tensor |
|
|
|
# Denormalize predictions and actual values |
|
preds = logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) |
|
actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) |
|
|
|
all_preds.append(preds) |
|
all_actuals.append(actuals) |
|
|
|
# Concatenate all batches |
|
all_preds = torch.cat(all_preds).numpy() |
|
all_actuals = torch.cat(all_actuals).numpy() |
|
|
|
# Compute error metrics |
|
mae = mean_absolute_error(all_actuals, all_preds) |
|
rmse = mean_squared_error(all_actuals, all_preds, squared=False) |
|
|
|
print(f'Release Dataset Mean Absolute Error: {mae}') |
|
print(f'Release Dataset Root Mean Squared Error: {rmse}') |
|
|
|
# Convert predictions and actuals to meters |
|
latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine |
|
meters_per_degree_latitude = 111000 # Constant |
|
meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians) # Adjusted for latitude mean |
|
|
|
all_preds_meters = all_preds.copy() |
|
all_preds_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters |
|
all_preds_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters |
|
|
|
all_actuals_meters = all_actuals.copy() |
|
all_actuals_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters |
|
all_actuals_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters |
|
|
|
# Compute error metrics in meters |
|
mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters) |
|
rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False) |
|
|
|
print(f"Mean Absolute Error (meters): {mae_meters:.2f}") |
|
print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}") |
|
``` |
|
|
|
After running the inference, the following results are printed - |
|
``` |
|
Release Dataset Mean Absolute Error: 0.00046400768003540093 |
|
Release Dataset Root Mean Squared Error: 0.0005684648079729969 |
|
Mean Absolute Error (meters): 45.92 |
|
Root Mean Squared Error (meters): 56.18 |
|
``` |