lling0212 commited on
Commit
d5aa7dd
·
1 Parent(s): e804e6b
Files changed (1) hide show
  1. README copy.md +0 -514
README copy.md DELETED
@@ -1,514 +0,0 @@
1
- ### Relevant imports & set up
2
- ```python
3
- !pip install geopy > delete.txt
4
- !pip install datasets > delete.txt
5
- !pip install torch torchvision datasets > delete.txt
6
- !pip install huggingface_hub > delete.txt
7
- !rm delete.txt
8
- ```
9
-
10
- ```python
11
- !pip install transformers
12
- import transformers
13
- ```
14
-
15
- ```python
16
- !huggingface-cli login --token [your_token]
17
- ```
18
-
19
- ```python
20
- lat_mean = 39.95156937654321
21
- lat_std = 0.0005992518588323268
22
- lon_mean = -75.19136795987654
23
- lon_std = 0.0007030395253318959
24
- ```
25
-
26
- ### Instructions
27
- Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run the model definitions.
28
-
29
- #### Load and define models
30
-
31
- ```python
32
- from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel
33
- import torch
34
- import torch.nn as nn
35
- import os
36
- from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
37
-
38
- class CustomConvNeXtConfig(PretrainedConfig):
39
- model_type = "custom-convnext"
40
-
41
- def __init__(self, num_labels=2, **kwargs):
42
- super().__init__(**kwargs)
43
- self.num_labels = num_labels # Register number of labels (output dimensions)
44
-
45
- class CustomConvNeXtModel(PreTrainedModel):
46
- config_class = CustomConvNeXtConfig
47
-
48
- def __init__(self, config, model_name="facebook/convnext-tiny-224",
49
- num_classes=2, train_final_layer_only=False):
50
- super().__init__(config)
51
-
52
- # Load pre-trained ConvNeXt model from Hugging Face
53
- self.convnext = AutoModelForImageClassification.from_pretrained(model_name)
54
-
55
- # Access the input features of the existing classifier
56
- in_features = self.convnext.classifier.in_features
57
-
58
- # Modify the classifier layer to match the number of output classes
59
- self.convnext.classifier = nn.Linear(in_features, num_classes)
60
-
61
- # Freeze previous weights if only training the final layer
62
- if train_final_layer_only:
63
- for name, param in self.convnext.named_parameters():
64
- if "classifier" not in name:
65
- param.requires_grad = False
66
- else:
67
- print(f"Unfrozen layer: {name}")
68
-
69
- def forward(self, x):
70
- return self.convnext(x)
71
-
72
- @classmethod
73
- def from_pretrained(cls, repo_id, model_name="facebook/convnext-tiny-224", **kwargs):
74
- """Load model weights and configuration from Hugging Face Hub."""
75
- # Download model.safetensors from Hugging Face Hub
76
- model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
77
-
78
- # Download config.json from Hugging Face Hub
79
- config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
80
-
81
- # Load configuration
82
- config = CustomConvNeXtConfig.from_pretrained(config_path)
83
-
84
- # Create the model
85
- model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
86
-
87
- # Load state_dict from safetensors file
88
- from safetensors.torch import load_file # Safetensors library
89
- state_dict = load_file(model_path)
90
- model.load_state_dict(state_dict)
91
-
92
- return model
93
-
94
-
95
- class CustomResNetConfig(PretrainedConfig):
96
- model_type = "custom-resnet"
97
-
98
- def __init__(self, num_labels=2, **kwargs):
99
- super().__init__(**kwargs)
100
- self.num_labels = num_labels # Register number of labels (output dimensions)
101
-
102
- class CustomResNetModel(nn.Module, PyTorchModelHubMixin):
103
- config_class = CustomResNetConfig
104
-
105
- def __init__(self, model_name="microsoft/resnet-18",
106
- num_classes=2,
107
- train_final_layer_only=False):
108
- super().__init__()
109
-
110
- # Load pre-trained ResNet model from Hugging Face
111
- self.resnet = AutoModelForImageClassification.from_pretrained(model_name)
112
-
113
- # Access the Linear layer within the Sequential classifier
114
- in_features = self.resnet.classifier[1].in_features # Accessing the Linear layer within the Sequential
115
-
116
- # Modify the classifier layer to have the desired number of output classes
117
- self.resnet.classifier = nn.Sequential(
118
- nn.Flatten(),
119
- nn.Linear(in_features, num_classes)
120
- )
121
-
122
- self.config = CustomResNetConfig(num_labels=num_classes)
123
-
124
- # Freeze previous weights
125
- if train_final_layer_only:
126
- for name, param in self.resnet.named_parameters():
127
- if "classifier" not in name:
128
- param.requires_grad = False
129
- else:
130
- print(f"Unfrozen layer: {name}")
131
-
132
- def forward(self, x):
133
- return self.resnet(x)
134
-
135
- def save_pretrained(self, save_directory, **kwargs):
136
- """Save model weights and custom configuration in Hugging Face format."""
137
- os.makedirs(save_directory, exist_ok=True)
138
-
139
- # Save model weights
140
- torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))
141
-
142
- # Save configuration
143
- self.config.save_pretrained(save_directory)
144
-
145
- @classmethod
146
- def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs):
147
- """Load model weights and configuration from Hugging Face Hub or local directory."""
148
- # Download pytorch_model.bin from Hugging Face Hub
149
- model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
150
-
151
- # Download config.json from Hugging Face Hub
152
- config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
153
-
154
- # Load configuration
155
- config = CustomResNetConfig.from_pretrained(config_path)
156
-
157
- # Create the model
158
- model = cls(model_name=model_name, num_classes=config.num_labels)
159
-
160
- # Load state_dict
161
- model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
162
-
163
- return model
164
-
165
-
166
- class CustomEfficientNetConfig(PretrainedConfig):
167
- model_type = "custom-efficientnet"
168
-
169
- def __init__(self, num_labels=2, **kwargs):
170
- super().__init__(**kwargs)
171
- self.num_labels = num_labels # Register number of labels (output dimensions)
172
-
173
- class CustomEfficientNetModel(PreTrainedModel):
174
- config_class = CustomEfficientNetConfig
175
-
176
- def __init__(self, config, model_name="google/efficientnet-b0",
177
- num_classes=2, train_final_layer_only=False):
178
- super().__init__(config)
179
-
180
- # Load pre-trained EfficientNet model from Hugging Face
181
- self.efficientnet = AutoModelForImageClassification.from_pretrained(model_name)
182
-
183
- # Access the input features of the existing classifier
184
- in_features = self.efficientnet.classifier.in_features
185
-
186
- # Modify the classifier layer to match the number of output classes
187
- self.efficientnet.classifier = nn.Sequential(
188
- nn.Linear(in_features, num_classes)
189
- )
190
-
191
- # Freeze previous weights if only training the final layer
192
- if train_final_layer_only:
193
- for name, param in self.efficientnet.named_parameters():
194
- if "classifier" not in name:
195
- param.requires_grad = False
196
- else:
197
- print(f"Unfrozen layer: {name}")
198
-
199
- def forward(self, x):
200
- return self.efficientnet(x)
201
-
202
- @classmethod
203
- def from_pretrained(cls, repo_id, model_name="google/efficientnet-b0", **kwargs):
204
- """Load model weights and configuration from Hugging Face Hub."""
205
- # Attempt to download the safetensors model file
206
- try:
207
- model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
208
- state_dict = load_file(model_path)
209
- except Exception as e:
210
- raise ValueError(
211
- f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
212
- ) from e
213
-
214
- # Download config.json from Hugging Face Hub
215
- config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
216
-
217
- # Load configuration
218
- config = CustomEfficientNetConfig.from_pretrained(config_path)
219
-
220
- # Create the model
221
- model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
222
-
223
- # Load the state_dict into the model
224
- model.load_state_dict(state_dict)
225
-
226
- return model
227
-
228
-
229
- class CustomViTConfig(PretrainedConfig):
230
- model_type = "custom-vit"
231
-
232
- def __init__(self, num_labels=2, **kwargs):
233
- super().__init__(**kwargs)
234
- self.num_labels = num_labels # Register number of labels (output dimensions)
235
-
236
- class CustomViTModel(PreTrainedModel):
237
- config_class = CustomViTConfig
238
-
239
- def __init__(self, config, model_name="google/vit-base-patch16-224",
240
- num_classes=2, train_final_layer_only=False):
241
- super().__init__(config)
242
-
243
- # Load pre-trained ViT model from Hugging Face
244
- self.vit = AutoModelForImageClassification.from_pretrained(model_name)
245
-
246
- # Access the input features of the existing classifier
247
- in_features = self.vit.classifier.in_features
248
-
249
- # Modify the classifier layer to match the number of output classes
250
- self.vit.classifier = nn.Linear(in_features, num_classes)
251
-
252
- # Freeze previous weights if only training the final layer
253
- if train_final_layer_only:
254
- for name, param in self.vit.named_parameters():
255
- if "classifier" not in name:
256
- param.requires_grad = False
257
- else:
258
- print(f"Unfrozen layer: {name}")
259
-
260
- def forward(self, x):
261
- return self.vit(x)
262
-
263
- @classmethod
264
- def from_pretrained(cls, repo_id, model_name="google/vit-base-patch16-224", **kwargs):
265
- # Attempt to download the safetensors model file
266
- try:
267
- model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
268
- state_dict = load_file(model_path)
269
- except Exception as e:
270
- raise ValueError(
271
- f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
272
- ) from e
273
-
274
- # Download config.json from Hugging Face Hub
275
- config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
276
-
277
- # Load configuration
278
- config = CustomViTConfig.from_pretrained(config_path)
279
-
280
- # Create the model
281
- model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
282
-
283
- # Load the state_dict into the model
284
- model.load_state_dict(state_dict)
285
-
286
- return model
287
-
288
-
289
- # Define the WeightedEnsembleModel class
290
- class WeightedEnsembleModel(nn.Module):
291
- def __init__(self, models, weights):
292
- """
293
- Initialize the ensemble model with individual models and their weights.
294
- """
295
- super(WeightedEnsembleModel, self).__init__()
296
- self.models = nn.ModuleList(models) # Wrap models in ModuleList
297
- self.weights = weights
298
-
299
- def forward(self, images):
300
- """
301
- Forward pass for the ensemble model.
302
- Performs weighted averaging of logits from individual models.
303
- """
304
- ensemble_logits = torch.zeros((images.size(0), 2)).to(images.device) # Initialize logits
305
- for model, weight in zip(self.models, self.weights):
306
- outputs = model(images)
307
- logits = outputs.logits if hasattr(outputs, "logits") else outputs # Extract logits
308
- ensemble_logits += weight * logits # Weighted sum of logits
309
- return ensemble_logits
310
-
311
-
312
-
313
- ```
314
-
315
-
316
- Now, load the model weights from huggingface.
317
- ```python
318
- from transformers import AutoModelForImageClassification
319
- import torch
320
- from sklearn.metrics import mean_absolute_error, mean_squared_error
321
- import matplotlib.pyplot as plt
322
- import numpy as np
323
-
324
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
325
- ```
326
-
327
- ```python
328
- #resnet
329
- resnet = CustomResNetModel.from_pretrained(
330
- "final-project-5190/model-resnet-50-base",
331
- model_name="microsoft/resnet-50"
332
- )
333
-
334
- #convnext
335
- convnext=CustomConvNeXtModel.from_pretrained(
336
- "final-project-5190/model-convnext-tiny-reducePlateau",
337
- model_name="facebook/convnext-tiny-224")
338
-
339
- #vit
340
- vit = CustomViTModel.from_pretrained(
341
- "final-project-5190/model-ViT-base",
342
- model_name="google/vit-base-patch16-224"
343
- )
344
-
345
- #efficientnet
346
- efficientnet = CustomEfficientNetModel.from_pretrained(
347
- "final-project-5190/model-efficientnet-b0-base",
348
- model_name="google/efficientnet-b0"
349
- )
350
-
351
- models = [convnext, resnet, vit, efficientnet]
352
- weights = [0.28, 0.26, 0.20, 0.27]
353
- ```
354
-
355
-
356
-
357
- #### For data loading
358
- ```python
359
- # Download
360
- from datasets import load_dataset, Image
361
- ```
362
-
363
- ```python
364
- import torch
365
- import torch.nn as nn
366
- import torchvision.models as models
367
- import torchvision.transforms as transforms
368
- from torch.utils.data import DataLoader, Dataset
369
- from transformers import AutoImageProcessor, AutoModelForImageClassification, AutoConfig
370
- from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
371
- from PIL import Image
372
- import os
373
- import numpy as np
374
-
375
- class GPSImageDataset(Dataset):
376
- def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None):
377
- self.hf_dataset = hf_dataset
378
- self.transform = transform
379
-
380
- # Compute mean and std from the dataframe if not provided
381
- self.latitude_mean = lat_mean if lat_mean is not None else np.mean(np.array(self.hf_dataset['Latitude']))
382
- self.latitude_std = lat_std if lat_std is not None else np.std(np.array(self.hf_dataset['Latitude']))
383
- self.longitude_mean = lon_mean if lon_mean is not None else np.mean(np.array(self.hf_dataset['Longitude']))
384
- self.longitude_std = lon_std if lon_std is not None else np.std(np.array(self.hf_dataset['Longitude']))
385
-
386
- def __len__(self):
387
- return len(self.hf_dataset)
388
-
389
- def __getitem__(self, idx):
390
- # Extract data
391
- example = self.hf_dataset[idx]
392
-
393
- # Load and process the image
394
- image = example['image']
395
- latitude = example['Latitude']
396
- longitude = example['Longitude']
397
- # image = image.rotate(-90, expand=True)
398
- if self.transform:
399
- image = self.transform(image)
400
-
401
- # Normalize GPS coordinates
402
- latitude = (latitude - self.latitude_mean) / self.latitude_std
403
- longitude = (longitude - self.longitude_mean) / self.longitude_std
404
- gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32)
405
-
406
- return image, gps_coords
407
- ```
408
-
409
- ```python
410
- # Dataloader + Visualize
411
- transform = transforms.Compose([
412
- transforms.RandomResizedCrop(224), # Random crop and resize to 224x224
413
- transforms.RandomHorizontalFlip(), # Random horizontal flip
414
- # transforms.RandomRotation(degrees=15), # Random rotation between -15 and 15 degrees
415
- transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Random color jitter
416
- # transforms.GaussianBlur(kernel_size=(3, 5), sigma=(0.1, 2.0)),
417
- # transforms.RandomPerspective(distortion_scale=0.5, p=0.5),
418
- transforms.ToTensor(),
419
-
420
- transforms.Normalize(mean=[0.485, 0.456, 0.406],
421
- std=[0.229, 0.224, 0.225])
422
- ])
423
-
424
- # Optionally, you can create a separate transform for inference without augmentations
425
- inference_transform = transforms.Compose([
426
- transforms.Resize((224, 224)),
427
- transforms.ToTensor(),
428
- transforms.Normalize(mean=[0.485, 0.456, 0.406],
429
- std=[0.229, 0.224, 0.225])
430
- ])
431
- ```
432
-
433
- Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE.
434
-
435
- ```python
436
- # Load test data
437
- release_data = load_dataset("gydou/released_img", split="train")
438
- ```
439
-
440
- ```python
441
- # Create dataset and dataloader using training mean and std
442
- rel_dataset = GPSImageDataset(
443
- hf_dataset=release_data,
444
- transform=inference_transform,
445
- lat_mean=lat_mean,
446
- lat_std=lat_std,
447
- lon_mean=lon_mean,
448
- lon_std=lon_std
449
- )
450
- rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False)
451
- ```
452
-
453
-
454
- ```python
455
- # ensemble
456
- ensemble_model = WeightedEnsembleModel(models=models, weights=weights).to(device)
457
-
458
- # Validation
459
- all_preds = []
460
- all_actuals = []
461
-
462
- ensemble_model.eval()
463
- with torch.no_grad():
464
- for images, gps_coords in rel_dataloader:
465
- images, gps_coords = images.to(device), gps_coords.to(device)
466
-
467
- # Weighted ensemble prediction using the new model
468
- ensemble_logits = ensemble_model(images)
469
-
470
- # Denormalize predictions and actual values
471
- preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
472
- actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
473
-
474
- all_preds.append(preds)
475
- all_actuals.append(actuals)
476
-
477
- # Concatenate all batches
478
- all_preds = torch.cat(all_preds).numpy()
479
- all_actuals = torch.cat(all_actuals).numpy()
480
-
481
- # Compute error metrics
482
- mae = mean_absolute_error(all_actuals, all_preds)
483
- rmse = mean_squared_error(all_actuals, all_preds, squared=False)
484
-
485
- print(f'Mean Absolute Error: {mae}')
486
- print(f'Root Mean Squared Error: {rmse}')
487
-
488
- # Convert predictions and actuals to meters
489
- latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine
490
- meters_per_degree_latitude = 111000 # Constant
491
- meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians) # Adjusted for latitude mean
492
-
493
- all_preds_meters = all_preds.copy()
494
- all_preds_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
495
- all_preds_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters
496
-
497
- all_actuals_meters = all_actuals.copy()
498
- all_actuals_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
499
- all_actuals_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters
500
-
501
- # Compute error metrics in meters
502
- mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters)
503
- rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False)
504
-
505
- print(f"Mean Absolute Error (meters): {mae_meters:.2f}")
506
- print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}")
507
-
508
- ```
509
-
510
- After running inference on the release test set, our results are the following.
511
- - Release Dataset Mean Absolute Error: 0.0004267849560326909
512
- - Release Dataset Root Mean Squared Error: 0.0005247778631268114
513
- - Mean Absolute Error (meters): 41.90
514
- - Root Mean Squared Error (meters): 51.29