lling0212 commited on
Commit
42f6af3
·
1 Parent(s): 96fbad0

Add README.md

Browse files
Files changed (1) hide show
  1. README.md +513 -0
README.md ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Relevant imports & set up
2
+ ```python
3
+ !pip install geopy > delete.txt
4
+ !pip install datasets > delete.txt
5
+ !pip install torch torchvision datasets > delete.txt
6
+ !pip install huggingface_hub > delete.txt
7
+ !rm delete.txt
8
+ ```
9
+
10
+ ```python
11
+ !pip install transformers
12
+ import transformers
13
+ ```
14
+
15
+ ```python
16
+ !huggingface-cli login --token [your_token]
17
+ ```
18
+
19
+ ```python
20
+ lat_mean = 39.95156937654321
21
+ lat_std = 0.0005992518588323268
22
+ lon_mean = -75.19136795987654
23
+ lon_std = 0.0007030395253318959
24
+ ```
25
+
26
+ ### Instructions
27
+ Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run all the model definitions.
28
+
29
+ #### Load and define models
30
+ 1. ConvNeXt
31
+ ```python
32
+ from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel
33
+ import torch
34
+ import torch.nn as nn
35
+ import os
36
+ from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
37
+
38
+ class CustomConvNeXtConfig(PretrainedConfig):
39
+ model_type = "custom-convnext"
40
+
41
+ def __init__(self, num_labels=2, **kwargs):
42
+ super().__init__(**kwargs)
43
+ self.num_labels = num_labels # Register number of labels (output dimensions)
44
+
45
+ class CustomConvNeXtModel(PreTrainedModel):
46
+ config_class = CustomConvNeXtConfig
47
+
48
+ def __init__(self, config, model_name="facebook/convnext-tiny-224",
49
+ num_classes=2, train_final_layer_only=False):
50
+ super().__init__(config)
51
+
52
+ # Load pre-trained ConvNeXt model from Hugging Face
53
+ self.convnext = AutoModelForImageClassification.from_pretrained(model_name)
54
+
55
+ # Access the input features of the existing classifier
56
+ in_features = self.convnext.classifier.in_features
57
+
58
+ # Modify the classifier layer to match the number of output classes
59
+ self.convnext.classifier = nn.Linear(in_features, num_classes)
60
+
61
+ # Freeze previous weights if only training the final layer
62
+ if train_final_layer_only:
63
+ for name, param in self.convnext.named_parameters():
64
+ if "classifier" not in name:
65
+ param.requires_grad = False
66
+ else:
67
+ print(f"Unfrozen layer: {name}")
68
+
69
+ def forward(self, x):
70
+ return self.convnext(x)
71
+
72
+ @classmethod
73
+ def from_pretrained(cls, repo_id, model_name="facebook/convnext-tiny-224", **kwargs):
74
+ """Load model weights and configuration from Hugging Face Hub."""
75
+ # Download model.safetensors from Hugging Face Hub
76
+ model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
77
+
78
+ # Download config.json from Hugging Face Hub
79
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
80
+
81
+ # Load configuration
82
+ config = CustomConvNeXtConfig.from_pretrained(config_path)
83
+
84
+ # Create the model
85
+ model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
86
+
87
+ # Load state_dict from safetensors file
88
+ from safetensors.torch import load_file # Safetensors library
89
+ state_dict = load_file(model_path)
90
+ model.load_state_dict(state_dict)
91
+
92
+ return model
93
+
94
+ ```
95
+
96
+ 2. ResNet
97
+ ``` python
98
+ from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
99
+
100
+ class CustomResNetConfig(PretrainedConfig):
101
+ model_type = "custom-resnet"
102
+
103
+ def __init__(self, num_labels=2, **kwargs):
104
+ super().__init__(**kwargs)
105
+ self.num_labels = num_labels # Register number of labels (output dimensions)
106
+
107
+ class CustomResNetModel(nn.Module, PyTorchModelHubMixin):
108
+ config_class = CustomResNetConfig
109
+
110
+ def __init__(self, model_name="microsoft/resnet-18",
111
+ num_classes=2,
112
+ train_final_layer_only=False):
113
+ super().__init__()
114
+
115
+ # Load pre-trained ResNet model from Hugging Face
116
+ self.resnet = AutoModelForImageClassification.from_pretrained(model_name)
117
+
118
+ # Access the Linear layer within the Sequential classifier
119
+ in_features = self.resnet.classifier[1].in_features # Accessing the Linear layer within the Sequential
120
+
121
+ # Modify the classifier layer to have the desired number of output classes
122
+ self.resnet.classifier = nn.Sequential(
123
+ nn.Flatten(),
124
+ nn.Linear(in_features, num_classes)
125
+ )
126
+
127
+ self.config = CustomResNetConfig(num_labels=num_classes)
128
+
129
+ # Freeze previous weights
130
+ if train_final_layer_only:
131
+ for name, param in self.resnet.named_parameters():
132
+ if "classifier" not in name:
133
+ param.requires_grad = False
134
+ else:
135
+ print(f"Unfrozen layer: {name}")
136
+
137
+ def forward(self, x):
138
+ return self.resnet(x)
139
+
140
+ def save_pretrained(self, save_directory, **kwargs):
141
+ """Save model weights and custom configuration in Hugging Face format."""
142
+ os.makedirs(save_directory, exist_ok=True)
143
+
144
+ # Save model weights
145
+ torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))
146
+
147
+ # Save configuration
148
+ self.config.save_pretrained(save_directory)
149
+
150
+ @classmethod
151
+ def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs):
152
+ """Load model weights and configuration from Hugging Face Hub or local directory."""
153
+ # Download pytorch_model.bin from Hugging Face Hub
154
+ model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
155
+
156
+ # Download config.json from Hugging Face Hub
157
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
158
+
159
+ # Load configuration
160
+ config = CustomResNetConfig.from_pretrained(config_path)
161
+
162
+ # Create the model
163
+ model = cls(model_name=model_name, num_classes=config.num_labels)
164
+
165
+ # Load state_dict
166
+ model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
167
+
168
+ return model
169
+ ```
170
+
171
+ 3. EfficientNet
172
+ ``` python
173
+ from huggingface_hub import hf_hub_download
174
+ from safetensors.torch import load_file # Make sure to import this
175
+ from transformers import AutoModelForImageClassification, PreTrainedModel, PretrainedConfig
176
+ import torch.nn as nn
177
+
178
+ class CustomEfficientNetConfig(PretrainedConfig):
179
+ model_type = "custom-efficientnet"
180
+
181
+ def __init__(self, num_labels=2, **kwargs):
182
+ super().__init__(**kwargs)
183
+ self.num_labels = num_labels # Register number of labels (output dimensions)
184
+
185
+ class CustomEfficientNetModel(PreTrainedModel):
186
+ config_class = CustomEfficientNetConfig
187
+
188
+ def __init__(self, config, model_name="google/efficientnet-b0",
189
+ num_classes=2, train_final_layer_only=False):
190
+ super().__init__(config)
191
+
192
+ # Load pre-trained EfficientNet model from Hugging Face
193
+ self.efficientnet = AutoModelForImageClassification.from_pretrained(model_name)
194
+
195
+ # Access the input features of the existing classifier
196
+ in_features = self.efficientnet.classifier.in_features
197
+
198
+ # Modify the classifier layer to match the number of output classes
199
+ self.efficientnet.classifier = nn.Sequential(
200
+ nn.Linear(in_features, num_classes)
201
+ )
202
+
203
+ # Freeze previous weights if only training the final layer
204
+ if train_final_layer_only:
205
+ for name, param in self.efficientnet.named_parameters():
206
+ if "classifier" not in name:
207
+ param.requires_grad = False
208
+ else:
209
+ print(f"Unfrozen layer: {name}")
210
+
211
+ def forward(self, x):
212
+ return self.efficientnet(x)
213
+
214
+ @classmethod
215
+ def from_pretrained(cls, repo_id, model_name="google/efficientnet-b0", **kwargs):
216
+ """Load model weights and configuration from Hugging Face Hub."""
217
+ # Attempt to download the safetensors model file
218
+ try:
219
+ model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
220
+ state_dict = load_file(model_path)
221
+ except Exception as e:
222
+ raise ValueError(
223
+ f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
224
+ ) from e
225
+
226
+ # Download config.json from Hugging Face Hub
227
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
228
+
229
+ # Load configuration
230
+ config = CustomEfficientNetConfig.from_pretrained(config_path)
231
+
232
+ # Create the model
233
+ model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
234
+
235
+ # Load the state_dict into the model
236
+ model.load_state_dict(state_dict)
237
+
238
+ return model
239
+ ```
240
+
241
+ 4. ViT
242
+ ```python
243
+ from huggingface_hub import hf_hub_download
244
+ from safetensors.torch import load_file
245
+ from transformers import AutoModelForImageClassification, PreTrainedModel, PretrainedConfig
246
+ import torch.nn as nn
247
+
248
+ class CustomViTConfig(PretrainedConfig):
249
+ model_type = "custom-vit"
250
+
251
+ def __init__(self, num_labels=2, **kwargs):
252
+ super().__init__(**kwargs)
253
+ self.num_labels = num_labels # Register number of labels (output dimensions)
254
+
255
+ class CustomViTModel(PreTrainedModel):
256
+ config_class = CustomViTConfig
257
+
258
+ def __init__(self, config, model_name="google/vit-base-patch16-224",
259
+ num_classes=2, train_final_layer_only=False):
260
+ super().__init__(config)
261
+
262
+ # Load pre-trained ViT model from Hugging Face
263
+ self.vit = AutoModelForImageClassification.from_pretrained(model_name)
264
+
265
+ # Access the input features of the existing classifier
266
+ in_features = self.vit.classifier.in_features
267
+
268
+ # Modify the classifier layer to match the number of output classes
269
+ self.vit.classifier = nn.Linear(in_features, num_classes)
270
+
271
+ # Freeze previous weights if only training the final layer
272
+ if train_final_layer_only:
273
+ for name, param in self.vit.named_parameters():
274
+ if "classifier" not in name:
275
+ param.requires_grad = False
276
+ else:
277
+ print(f"Unfrozen layer: {name}")
278
+
279
+ def forward(self, x):
280
+ return self.vit(x)
281
+
282
+ @classmethod
283
+ def from_pretrained(cls, repo_id, model_name="google/vit-base-patch16-224", **kwargs):
284
+ # Attempt to download the safetensors model file
285
+ try:
286
+ model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
287
+ state_dict = load_file(model_path)
288
+ except Exception as e:
289
+ raise ValueError(
290
+ f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
291
+ ) from e
292
+
293
+ # Download config.json from Hugging Face Hub
294
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
295
+
296
+ # Load configuration
297
+ config = CustomViTConfig.from_pretrained(config_path)
298
+
299
+ # Create the model
300
+ model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
301
+
302
+ # Load the state_dict into the model
303
+ model.load_state_dict(state_dict)
304
+
305
+ return model
306
+ ```
307
+
308
+ Now, load the model weights from huggingface.
309
+ ```python
310
+ from transformers import AutoModelForImageClassification
311
+ import torch
312
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
313
+ import matplotlib.pyplot as plt
314
+ import numpy as np
315
+ ```
316
+
317
+ ```python
318
+ #resnet
319
+ resnet = CustomResNetModel.from_pretrained(
320
+ "final-project-5190/model-resnet-50-base",
321
+ model_name="microsoft/resnet-50"
322
+ )
323
+ ```
324
+
325
+
326
+ ```python
327
+ #convnext
328
+ convnext=CustomConvNeXtModel.from_pretrained(
329
+ "final-project-5190/model-convnext-tiny-reducePlateau",
330
+ model_name="facebook/convnext-tiny-224")
331
+ ```
332
+
333
+ ```python
334
+ #vit
335
+ vit = CustomViTModel.from_pretrained(
336
+ "final-project-5190/model-ViT-base",
337
+ model_name="google/vit-base-patch16-224"
338
+ )
339
+ ```
340
+
341
+ ```python
342
+ #efficientnet
343
+ efficientnet = CustomEfficientNetModel.from_pretrained(
344
+ "final-project-5190/model-efficientnet-b0-base",
345
+ model_name="google/efficientnet-b0"
346
+ )
347
+ ```
348
+
349
+ #### For data loading
350
+ ```python
351
+ # Download
352
+ from datasets import load_dataset, Image
353
+ ```
354
+
355
+ ```python
356
+ import torch
357
+ import torch.nn as nn
358
+ import torchvision.models as models
359
+ import torchvision.transforms as transforms
360
+ from torch.utils.data import DataLoader, Dataset
361
+ from transformers import AutoImageProcessor, AutoModelForImageClassification, AutoConfig
362
+ from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
363
+ from PIL import Image
364
+ import os
365
+ import numpy as np
366
+
367
+ class GPSImageDataset(Dataset):
368
+ def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None):
369
+ self.hf_dataset = hf_dataset
370
+ self.transform = transform
371
+
372
+ # Compute mean and std from the dataframe if not provided
373
+ self.latitude_mean = lat_mean if lat_mean is not None else np.mean(np.array(self.hf_dataset['Latitude']))
374
+ self.latitude_std = lat_std if lat_std is not None else np.std(np.array(self.hf_dataset['Latitude']))
375
+ self.longitude_mean = lon_mean if lon_mean is not None else np.mean(np.array(self.hf_dataset['Longitude']))
376
+ self.longitude_std = lon_std if lon_std is not None else np.std(np.array(self.hf_dataset['Longitude']))
377
+
378
+ def __len__(self):
379
+ return len(self.hf_dataset)
380
+
381
+ def __getitem__(self, idx):
382
+ # Extract data
383
+ example = self.hf_dataset[idx]
384
+
385
+ # Load and process the image
386
+ image = example['image']
387
+ latitude = example['Latitude']
388
+ longitude = example['Longitude']
389
+ # image = image.rotate(-90, expand=True)
390
+ if self.transform:
391
+ image = self.transform(image)
392
+
393
+ # Normalize GPS coordinates
394
+ latitude = (latitude - self.latitude_mean) / self.latitude_std
395
+ longitude = (longitude - self.longitude_mean) / self.longitude_std
396
+ gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32)
397
+
398
+ return image, gps_coords
399
+ ```
400
+
401
+ ```python
402
+ # Dataloader + Visualize
403
+ transform = transforms.Compose([
404
+ transforms.RandomResizedCrop(224), # Random crop and resize to 224x224
405
+ transforms.RandomHorizontalFlip(), # Random horizontal flip
406
+ # transforms.RandomRotation(degrees=15), # Random rotation between -15 and 15 degrees
407
+ transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Random color jitter
408
+ # transforms.GaussianBlur(kernel_size=(3, 5), sigma=(0.1, 2.0)),
409
+ # transforms.RandomPerspective(distortion_scale=0.5, p=0.5),
410
+ transforms.ToTensor(),
411
+
412
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
413
+ std=[0.229, 0.224, 0.225])
414
+ ])
415
+
416
+ # Optionally, you can create a separate transform for inference without augmentations
417
+ inference_transform = transforms.Compose([
418
+ transforms.Resize((224, 224)),
419
+ transforms.ToTensor(),
420
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
421
+ std=[0.229, 0.224, 0.225])
422
+ ])
423
+ ```
424
+
425
+ Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE.
426
+
427
+ ```python
428
+ # Load test data
429
+ release_data = load_dataset("gydou/released_img", split="train")
430
+ ```
431
+
432
+ ```python
433
+ # Create dataset and dataloader using training mean and std
434
+ rel_dataset = GPSImageDataset(
435
+ hf_dataset=release_data,
436
+ transform=inference_transform,
437
+ lat_mean=lat_mean,
438
+ lat_std=lat_std,
439
+ lon_mean=lon_mean,
440
+ lon_std=lon_std
441
+ )
442
+ rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False)
443
+ ```
444
+
445
+ ```python
446
+ models = [convnext, resnet, vit, efficientnet]
447
+ weights = [0.28, 0.26, 0.20, 0.27] # based on val 1/RMSE
448
+ ```
449
+
450
+ ```python
451
+ # Release
452
+
453
+ # Initialize lists to store predictions and actual values
454
+ all_preds = []
455
+ all_actuals = []
456
+
457
+ # Move models to device and set them to evaluation mode
458
+ for model in models:
459
+ model.to(device)
460
+ model.eval()
461
+
462
+ # Perform inference on release dataset
463
+ with torch.no_grad():
464
+ for images, gps_coords in rel_dataloader:
465
+ images, gps_coords = images.to(device), gps_coords.to(device)
466
+
467
+ # Weighted ensemble prediction
468
+ ensemble_logits = weighted_ensemble_predict(models, weights, images)
469
+
470
+ # Denormalize predictions and actual values
471
+ preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
472
+ actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
473
+
474
+ all_preds.append(preds)
475
+ all_actuals.append(actuals)
476
+
477
+ # Concatenate all batches
478
+ all_preds = torch.cat(all_preds).numpy()
479
+ all_actuals = torch.cat(all_actuals).numpy()
480
+
481
+ # Compute error metrics
482
+ mae = mean_absolute_error(all_actuals, all_preds)
483
+ rmse = mean_squared_error(all_actuals, all_preds, squared=False)
484
+
485
+ print(f'Release Dataset Mean Absolute Error: {mae}')
486
+ print(f'Release Dataset Root Mean Squared Error: {rmse}')
487
+
488
+ # Convert predictions and actuals to meters
489
+ latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine
490
+ meters_per_degree_latitude = 111000 # Constant
491
+ meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians) # Adjusted for latitude mean
492
+
493
+ all_preds_meters = all_preds.copy()
494
+ all_preds_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
495
+ all_preds_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters
496
+
497
+ all_actuals_meters = all_actuals.copy()
498
+ all_actuals_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
499
+ all_actuals_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters
500
+
501
+ # Compute error metrics in meters
502
+ mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters)
503
+ rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False)
504
+
505
+ print(f"Mean Absolute Error (meters): {mae_meters:.2f}")
506
+ print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}")
507
+ ```
508
+
509
+ After running inference on the release test set, our results are the following.
510
+ - Release Dataset Mean Absolute Error: 0.0004267849560326909
511
+ - Release Dataset Root Mean Squared Error: 0.0005247778631268114
512
+ - Mean Absolute Error (meters): 41.90
513
+ - Root Mean Squared Error (meters): 51.29