--- library_name: transformers tags: [] --- # OLD MODEL: DO NOT USE FOR LEADERBOARD # Model Card for Model ID This is a fine-tuned Vision Transformer (ViT) model from Google. The model was loaded and fine-tuned on the training data collected. Link: https://huggingface.co/google/vit-base-patch16-224-in21k lat_mean = 39.95164939753852 lat_std = 0.0007290994359226359 lon_mean = -75.191420541785 lon_std = 0.000733160718757529 ```python model_name = "AppliedMLReedShreya/ViT_Attempt_1" config = AutoConfig.from_pretrained(model_name) config.num_labels = 2 # We need two outputs: latitude and longitude # Load the pre-trained ViT model vit_model = AutoModelForImageClassification.from_pretrained(model_name, config=config) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f'Using device: {device}') vit_model = vit_model.to(device) # Initialize lists to store predictions and actual values all_preds = [] all_actuals = [] vit_model.eval() with torch.no_grad(): for images, gps_coords in val_dataloader: images, gps_coords = images.to(device), gps_coords.to(device) outputs = vit_model(images).logits # Denormalize predictions and actual values preds = outputs.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean]) all_preds.append(preds) all_actuals.append(actuals) # Concatenate all batches all_preds = torch.cat(all_preds).numpy() all_actuals = torch.cat(all_actuals).numpy() ```