Update README

002bd9d about 2 months ago

18.3 kB

	### Relevant imports & set up
	```python
	!pip install geopy > delete.txt
	!pip install datasets > delete.txt
	!pip install torch torchvision datasets > delete.txt
	!pip install huggingface_hub > delete.txt
	!rm delete.txt
	```

	```python
	!pip install transformers
	import transformers
	```

	```python
	!huggingface-cli login --token [your_token]
	```

	```python
	lat_mean = 39.95156937654321
	lat_std = 0.0005992518588323268
	lon_mean = -75.19136795987654
	lon_std = 0.0007030395253318959
	```

	### Instructions
	Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run the model definitions.

	#### Load and define models

	```python
	from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel
	import torch
	import torch.nn as nn
	import os
	from huggingface_hub import PyTorchModelHubMixin, hf_hub_download

	class CustomConvNeXtConfig(PretrainedConfig):
	model_type = "custom-convnext"

	def __init__(self, num_labels=2, **kwargs):
	super().__init__(**kwargs)
	self.num_labels = num_labels # Register number of labels (output dimensions)

	class CustomConvNeXtModel(PreTrainedModel):
	config_class = CustomConvNeXtConfig

	def __init__(self, config, model_name="facebook/convnext-tiny-224",
	num_classes=2, train_final_layer_only=False):
	super().__init__(config)

	# Load pre-trained ConvNeXt model from Hugging Face
	self.convnext = AutoModelForImageClassification.from_pretrained(model_name)

	# Access the input features of the existing classifier
	in_features = self.convnext.classifier.in_features

	# Modify the classifier layer to match the number of output classes
	self.convnext.classifier = nn.Linear(in_features, num_classes)

	# Freeze previous weights if only training the final layer
	if train_final_layer_only:
	for name, param in self.convnext.named_parameters():
	if "classifier" not in name:
	param.requires_grad = False
	else:
	print(f"Unfrozen layer: {name}")

	def forward(self, x):
	return self.convnext(x)

	@classmethod
	def from_pretrained(cls, repo_id, model_name="facebook/convnext-tiny-224", **kwargs):
	"""Load model weights and configuration from Hugging Face Hub."""
	# Download model.safetensors from Hugging Face Hub
	model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")

	# Download config.json from Hugging Face Hub
	config_path = hf_hub_download(repo_id=repo_id, filename="config.json")

	# Load configuration
	config = CustomConvNeXtConfig.from_pretrained(config_path)

	# Create the model
	model = cls(config=config, model_name=model_name, num_classes=config.num_labels)

	# Load state_dict from safetensors file
	from safetensors.torch import load_file # Safetensors library
	state_dict = load_file(model_path)
	model.load_state_dict(state_dict)

	return model


	class CustomResNetConfig(PretrainedConfig):
	model_type = "custom-resnet"

	def __init__(self, num_labels=2, **kwargs):
	super().__init__(**kwargs)
	self.num_labels = num_labels # Register number of labels (output dimensions)

	class CustomResNetModel(nn.Module, PyTorchModelHubMixin):
	config_class = CustomResNetConfig

	def __init__(self, model_name="microsoft/resnet-18",
	num_classes=2,
	train_final_layer_only=False):
	super().__init__()

	# Load pre-trained ResNet model from Hugging Face
	self.resnet = AutoModelForImageClassification.from_pretrained(model_name)

	# Access the Linear layer within the Sequential classifier
	in_features = self.resnet.classifier[1].in_features # Accessing the Linear layer within the Sequential

	# Modify the classifier layer to have the desired number of output classes
	self.resnet.classifier = nn.Sequential(
	nn.Flatten(),
	nn.Linear(in_features, num_classes)
	)

	self.config = CustomResNetConfig(num_labels=num_classes)

	# Freeze previous weights
	if train_final_layer_only:
	for name, param in self.resnet.named_parameters():
	if "classifier" not in name:
	param.requires_grad = False
	else:
	print(f"Unfrozen layer: {name}")

	def forward(self, x):
	return self.resnet(x)

	def save_pretrained(self, save_directory, **kwargs):
	"""Save model weights and custom configuration in Hugging Face format."""
	os.makedirs(save_directory, exist_ok=True)

	# Save model weights
	torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))

	# Save configuration
	self.config.save_pretrained(save_directory)

	@classmethod
	def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs):
	"""Load model weights and configuration from Hugging Face Hub or local directory."""
	# Download pytorch_model.bin from Hugging Face Hub
	model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")

	# Download config.json from Hugging Face Hub
	config_path = hf_hub_download(repo_id=repo_id, filename="config.json")

	# Load configuration
	config = CustomResNetConfig.from_pretrained(config_path)

	# Create the model
	model = cls(model_name=model_name, num_classes=config.num_labels)

	# Load state_dict
	model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))

	return model


	class CustomEfficientNetConfig(PretrainedConfig):
	model_type = "custom-efficientnet"

	def __init__(self, num_labels=2, **kwargs):
	super().__init__(**kwargs)
	self.num_labels = num_labels # Register number of labels (output dimensions)

	class CustomEfficientNetModel(PreTrainedModel):
	config_class = CustomEfficientNetConfig

	def __init__(self, config, model_name="google/efficientnet-b0",
	num_classes=2, train_final_layer_only=False):
	super().__init__(config)

	# Load pre-trained EfficientNet model from Hugging Face
	self.efficientnet = AutoModelForImageClassification.from_pretrained(model_name)

	# Access the input features of the existing classifier
	in_features = self.efficientnet.classifier.in_features

	# Modify the classifier layer to match the number of output classes
	self.efficientnet.classifier = nn.Sequential(
	nn.Linear(in_features, num_classes)
	)

	# Freeze previous weights if only training the final layer
	if train_final_layer_only:
	for name, param in self.efficientnet.named_parameters():
	if "classifier" not in name:
	param.requires_grad = False
	else:
	print(f"Unfrozen layer: {name}")

	def forward(self, x):
	return self.efficientnet(x)

	@classmethod
	def from_pretrained(cls, repo_id, model_name="google/efficientnet-b0", **kwargs):
	"""Load model weights and configuration from Hugging Face Hub."""
	# Attempt to download the safetensors model file
	try:
	model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
	state_dict = load_file(model_path)
	except Exception as e:
	raise ValueError(
	f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
	) from e

	# Download config.json from Hugging Face Hub
	config_path = hf_hub_download(repo_id=repo_id, filename="config.json")

	# Load configuration
	config = CustomEfficientNetConfig.from_pretrained(config_path)

	# Create the model
	model = cls(config=config, model_name=model_name, num_classes=config.num_labels)

	# Load the state_dict into the model
	model.load_state_dict(state_dict)

	return model


	class CustomViTConfig(PretrainedConfig):
	model_type = "custom-vit"

	def __init__(self, num_labels=2, **kwargs):
	super().__init__(**kwargs)
	self.num_labels = num_labels # Register number of labels (output dimensions)

	class CustomViTModel(PreTrainedModel):
	config_class = CustomViTConfig

	def __init__(self, config, model_name="google/vit-base-patch16-224",
	num_classes=2, train_final_layer_only=False):
	super().__init__(config)

	# Load pre-trained ViT model from Hugging Face
	self.vit = AutoModelForImageClassification.from_pretrained(model_name)

	# Access the input features of the existing classifier
	in_features = self.vit.classifier.in_features

	# Modify the classifier layer to match the number of output classes
	self.vit.classifier = nn.Linear(in_features, num_classes)

	# Freeze previous weights if only training the final layer
	if train_final_layer_only:
	for name, param in self.vit.named_parameters():
	if "classifier" not in name:
	param.requires_grad = False
	else:
	print(f"Unfrozen layer: {name}")

	def forward(self, x):
	return self.vit(x)

	@classmethod
	def from_pretrained(cls, repo_id, model_name="google/vit-base-patch16-224", **kwargs):
	# Attempt to download the safetensors model file
	try:
	model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
	state_dict = load_file(model_path)
	except Exception as e:
	raise ValueError(
	f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
	) from e

	# Download config.json from Hugging Face Hub
	config_path = hf_hub_download(repo_id=repo_id, filename="config.json")

	# Load configuration
	config = CustomViTConfig.from_pretrained(config_path)

	# Create the model
	model = cls(config=config, model_name=model_name, num_classes=config.num_labels)

	# Load the state_dict into the model
	model.load_state_dict(state_dict)

	return model


	# Define the WeightedEnsembleModel class
	class WeightedEnsembleModel(nn.Module):
	def __init__(self, models, weights):
	"""
	Initialize the ensemble model with individual models and their weights.
	"""
	super(WeightedEnsembleModel, self).__init__()
	self.models = nn.ModuleList(models) # Wrap models in ModuleList
	self.weights = weights

	def forward(self, images):
	"""
	Forward pass for the ensemble model.
	Performs weighted averaging of logits from individual models.
	"""
	ensemble_logits = torch.zeros((images.size(0), 2)).to(images.device) # Initialize logits
	for model, weight in zip(self.models, self.weights):
	outputs = model(images)
	logits = outputs.logits if hasattr(outputs, "logits") else outputs # Extract logits
	ensemble_logits += weight * logits # Weighted sum of logits
	return ensemble_logits



	```


	Now, load the model weights from huggingface.
	```python
	from transformers import AutoModelForImageClassification
	import torch
	from sklearn.metrics import mean_absolute_error, mean_squared_error
	import matplotlib.pyplot as plt
	import numpy as np

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	```

	```python
	#resnet
	resnet = CustomResNetModel.from_pretrained(
	"final-project-5190/model-resnet-50-base",
	model_name="microsoft/resnet-50"
	)

	#convnext
	convnext=CustomConvNeXtModel.from_pretrained(
	"final-project-5190/model-convnext-tiny-reducePlateau",
	model_name="facebook/convnext-tiny-224")

	#vit
	vit = CustomViTModel.from_pretrained(
	"final-project-5190/model-ViT-base",
	model_name="google/vit-base-patch16-224"
	)

	#efficientnet
	efficientnet = CustomEfficientNetModel.from_pretrained(
	"final-project-5190/model-efficientnet-b0-base",
	model_name="google/efficientnet-b0"
	)

	models = [convnext, resnet, vit, efficientnet]
	weights = [0.28, 0.26, 0.20, 0.27]
	```



	#### For data loading
	```python
	# Download
	from datasets import load_dataset, Image
	```

	```python
	import torch
	import torch.nn as nn
	import torchvision.models as models
	import torchvision.transforms as transforms
	from torch.utils.data import DataLoader, Dataset
	from transformers import AutoImageProcessor, AutoModelForImageClassification, AutoConfig
	from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
	from PIL import Image
	import os
	import numpy as np

	class GPSImageDataset(Dataset):
	def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None):
	self.hf_dataset = hf_dataset
	self.transform = transform

	# Compute mean and std from the dataframe if not provided
	self.latitude_mean = lat_mean if lat_mean is not None else np.mean(np.array(self.hf_dataset['Latitude']))
	self.latitude_std = lat_std if lat_std is not None else np.std(np.array(self.hf_dataset['Latitude']))
	self.longitude_mean = lon_mean if lon_mean is not None else np.mean(np.array(self.hf_dataset['Longitude']))
	self.longitude_std = lon_std if lon_std is not None else np.std(np.array(self.hf_dataset['Longitude']))

	def __len__(self):
	return len(self.hf_dataset)

	def __getitem__(self, idx):
	# Extract data
	example = self.hf_dataset[idx]

	# Load and process the image
	image = example['image']
	latitude = example['Latitude']
	longitude = example['Longitude']
	# image = image.rotate(-90, expand=True)
	if self.transform:
	image = self.transform(image)

	# Normalize GPS coordinates
	latitude = (latitude - self.latitude_mean) / self.latitude_std
	longitude = (longitude - self.longitude_mean) / self.longitude_std
	gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32)

	return image, gps_coords
	```

	```python
	# Dataloader + Visualize
	transform = transforms.Compose([
	transforms.RandomResizedCrop(224), # Random crop and resize to 224x224
	transforms.RandomHorizontalFlip(), # Random horizontal flip
	# transforms.RandomRotation(degrees=15), # Random rotation between -15 and 15 degrees
	transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Random color jitter
	# transforms.GaussianBlur(kernel_size=(3, 5), sigma=(0.1, 2.0)),
	# transforms.RandomPerspective(distortion_scale=0.5, p=0.5),
	transforms.ToTensor(),

	transforms.Normalize(mean=[0.485, 0.456, 0.406],
	std=[0.229, 0.224, 0.225])
	])

	# Optionally, you can create a separate transform for inference without augmentations
	inference_transform = transforms.Compose([
	transforms.Resize((224, 224)),
	transforms.ToTensor(),
	transforms.Normalize(mean=[0.485, 0.456, 0.406],
	std=[0.229, 0.224, 0.225])
	])
	```

	Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE.

	```python
	# Load test data
	release_data = load_dataset("gydou/released_img", split="train")
	```

	```python
	# Create dataset and dataloader using training mean and std
	rel_dataset = GPSImageDataset(
	hf_dataset=release_data,
	transform=inference_transform,
	lat_mean=lat_mean,
	lat_std=lat_std,
	lon_mean=lon_mean,
	lon_std=lon_std
	)
	rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False)
	```


	```python
	# ensemble
	ensemble_model = WeightedEnsembleModel(models=models, weights=weights).to(device)

	# Validation
	all_preds = []
	all_actuals = []

	ensemble_model.eval()
	with torch.no_grad():
	for images, gps_coords in rel_dataloader:
	images, gps_coords = images.to(device), gps_coords.to(device)

	# Weighted ensemble prediction using the new model
	ensemble_logits = ensemble_model(images)

	# Denormalize predictions and actual values
	preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
	actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])

	all_preds.append(preds)
	all_actuals.append(actuals)

	# Concatenate all batches
	all_preds = torch.cat(all_preds).numpy()
	all_actuals = torch.cat(all_actuals).numpy()

	# Compute error metrics
	mae = mean_absolute_error(all_actuals, all_preds)
	rmse = mean_squared_error(all_actuals, all_preds, squared=False)

	print(f'Mean Absolute Error: {mae}')
	print(f'Root Mean Squared Error: {rmse}')

	# Convert predictions and actuals to meters
	latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine
	meters_per_degree_latitude = 111000 # Constant
	meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians) # Adjusted for latitude mean

	all_preds_meters = all_preds.copy()
	all_preds_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
	all_preds_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters

	all_actuals_meters = all_actuals.copy()
	all_actuals_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
	all_actuals_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters

	# Compute error metrics in meters
	mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters)
	rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False)

	print(f"Mean Absolute Error (meters): {mae_meters:.2f}")
	print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}")

	```

	After running inference on the release test set, our results are the following.
	- Release Dataset Mean Absolute Error: 0.0004267849560326909
	- Release Dataset Root Mean Squared Error: 0.0005247778631268114
	- Mean Absolute Error (meters): 41.90
	- Root Mean Squared Error (meters): 51.29