lling0212 commited on
Commit
e804e6b
·
1 Parent(s): cb7a1a6

readme update

Browse files
Files changed (1) hide show
  1. README copy.md +514 -0
README copy.md ADDED
@@ -0,0 +1,514 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Relevant imports & set up
2
+ ```python
3
+ !pip install geopy > delete.txt
4
+ !pip install datasets > delete.txt
5
+ !pip install torch torchvision datasets > delete.txt
6
+ !pip install huggingface_hub > delete.txt
7
+ !rm delete.txt
8
+ ```
9
+
10
+ ```python
11
+ !pip install transformers
12
+ import transformers
13
+ ```
14
+
15
+ ```python
16
+ !huggingface-cli login --token [your_token]
17
+ ```
18
+
19
+ ```python
20
+ lat_mean = 39.95156937654321
21
+ lat_std = 0.0005992518588323268
22
+ lon_mean = -75.19136795987654
23
+ lon_std = 0.0007030395253318959
24
+ ```
25
+
26
+ ### Instructions
27
+ Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run the model definitions.
28
+
29
+ #### Load and define models
30
+
31
+ ```python
32
+ from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel
33
+ import torch
34
+ import torch.nn as nn
35
+ import os
36
+ from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
37
+
38
+ class CustomConvNeXtConfig(PretrainedConfig):
39
+ model_type = "custom-convnext"
40
+
41
+ def __init__(self, num_labels=2, **kwargs):
42
+ super().__init__(**kwargs)
43
+ self.num_labels = num_labels # Register number of labels (output dimensions)
44
+
45
+ class CustomConvNeXtModel(PreTrainedModel):
46
+ config_class = CustomConvNeXtConfig
47
+
48
+ def __init__(self, config, model_name="facebook/convnext-tiny-224",
49
+ num_classes=2, train_final_layer_only=False):
50
+ super().__init__(config)
51
+
52
+ # Load pre-trained ConvNeXt model from Hugging Face
53
+ self.convnext = AutoModelForImageClassification.from_pretrained(model_name)
54
+
55
+ # Access the input features of the existing classifier
56
+ in_features = self.convnext.classifier.in_features
57
+
58
+ # Modify the classifier layer to match the number of output classes
59
+ self.convnext.classifier = nn.Linear(in_features, num_classes)
60
+
61
+ # Freeze previous weights if only training the final layer
62
+ if train_final_layer_only:
63
+ for name, param in self.convnext.named_parameters():
64
+ if "classifier" not in name:
65
+ param.requires_grad = False
66
+ else:
67
+ print(f"Unfrozen layer: {name}")
68
+
69
+ def forward(self, x):
70
+ return self.convnext(x)
71
+
72
+ @classmethod
73
+ def from_pretrained(cls, repo_id, model_name="facebook/convnext-tiny-224", **kwargs):
74
+ """Load model weights and configuration from Hugging Face Hub."""
75
+ # Download model.safetensors from Hugging Face Hub
76
+ model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
77
+
78
+ # Download config.json from Hugging Face Hub
79
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
80
+
81
+ # Load configuration
82
+ config = CustomConvNeXtConfig.from_pretrained(config_path)
83
+
84
+ # Create the model
85
+ model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
86
+
87
+ # Load state_dict from safetensors file
88
+ from safetensors.torch import load_file # Safetensors library
89
+ state_dict = load_file(model_path)
90
+ model.load_state_dict(state_dict)
91
+
92
+ return model
93
+
94
+
95
+ class CustomResNetConfig(PretrainedConfig):
96
+ model_type = "custom-resnet"
97
+
98
+ def __init__(self, num_labels=2, **kwargs):
99
+ super().__init__(**kwargs)
100
+ self.num_labels = num_labels # Register number of labels (output dimensions)
101
+
102
+ class CustomResNetModel(nn.Module, PyTorchModelHubMixin):
103
+ config_class = CustomResNetConfig
104
+
105
+ def __init__(self, model_name="microsoft/resnet-18",
106
+ num_classes=2,
107
+ train_final_layer_only=False):
108
+ super().__init__()
109
+
110
+ # Load pre-trained ResNet model from Hugging Face
111
+ self.resnet = AutoModelForImageClassification.from_pretrained(model_name)
112
+
113
+ # Access the Linear layer within the Sequential classifier
114
+ in_features = self.resnet.classifier[1].in_features # Accessing the Linear layer within the Sequential
115
+
116
+ # Modify the classifier layer to have the desired number of output classes
117
+ self.resnet.classifier = nn.Sequential(
118
+ nn.Flatten(),
119
+ nn.Linear(in_features, num_classes)
120
+ )
121
+
122
+ self.config = CustomResNetConfig(num_labels=num_classes)
123
+
124
+ # Freeze previous weights
125
+ if train_final_layer_only:
126
+ for name, param in self.resnet.named_parameters():
127
+ if "classifier" not in name:
128
+ param.requires_grad = False
129
+ else:
130
+ print(f"Unfrozen layer: {name}")
131
+
132
+ def forward(self, x):
133
+ return self.resnet(x)
134
+
135
+ def save_pretrained(self, save_directory, **kwargs):
136
+ """Save model weights and custom configuration in Hugging Face format."""
137
+ os.makedirs(save_directory, exist_ok=True)
138
+
139
+ # Save model weights
140
+ torch.save(self.state_dict(), os.path.join(save_directory, "pytorch_model.bin"))
141
+
142
+ # Save configuration
143
+ self.config.save_pretrained(save_directory)
144
+
145
+ @classmethod
146
+ def from_pretrained(cls, repo_id, model_name="microsoft/resnet-18", **kwargs):
147
+ """Load model weights and configuration from Hugging Face Hub or local directory."""
148
+ # Download pytorch_model.bin from Hugging Face Hub
149
+ model_path = hf_hub_download(repo_id=repo_id, filename="pytorch_model.bin")
150
+
151
+ # Download config.json from Hugging Face Hub
152
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
153
+
154
+ # Load configuration
155
+ config = CustomResNetConfig.from_pretrained(config_path)
156
+
157
+ # Create the model
158
+ model = cls(model_name=model_name, num_classes=config.num_labels)
159
+
160
+ # Load state_dict
161
+ model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
162
+
163
+ return model
164
+
165
+
166
+ class CustomEfficientNetConfig(PretrainedConfig):
167
+ model_type = "custom-efficientnet"
168
+
169
+ def __init__(self, num_labels=2, **kwargs):
170
+ super().__init__(**kwargs)
171
+ self.num_labels = num_labels # Register number of labels (output dimensions)
172
+
173
+ class CustomEfficientNetModel(PreTrainedModel):
174
+ config_class = CustomEfficientNetConfig
175
+
176
+ def __init__(self, config, model_name="google/efficientnet-b0",
177
+ num_classes=2, train_final_layer_only=False):
178
+ super().__init__(config)
179
+
180
+ # Load pre-trained EfficientNet model from Hugging Face
181
+ self.efficientnet = AutoModelForImageClassification.from_pretrained(model_name)
182
+
183
+ # Access the input features of the existing classifier
184
+ in_features = self.efficientnet.classifier.in_features
185
+
186
+ # Modify the classifier layer to match the number of output classes
187
+ self.efficientnet.classifier = nn.Sequential(
188
+ nn.Linear(in_features, num_classes)
189
+ )
190
+
191
+ # Freeze previous weights if only training the final layer
192
+ if train_final_layer_only:
193
+ for name, param in self.efficientnet.named_parameters():
194
+ if "classifier" not in name:
195
+ param.requires_grad = False
196
+ else:
197
+ print(f"Unfrozen layer: {name}")
198
+
199
+ def forward(self, x):
200
+ return self.efficientnet(x)
201
+
202
+ @classmethod
203
+ def from_pretrained(cls, repo_id, model_name="google/efficientnet-b0", **kwargs):
204
+ """Load model weights and configuration from Hugging Face Hub."""
205
+ # Attempt to download the safetensors model file
206
+ try:
207
+ model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
208
+ state_dict = load_file(model_path)
209
+ except Exception as e:
210
+ raise ValueError(
211
+ f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
212
+ ) from e
213
+
214
+ # Download config.json from Hugging Face Hub
215
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
216
+
217
+ # Load configuration
218
+ config = CustomEfficientNetConfig.from_pretrained(config_path)
219
+
220
+ # Create the model
221
+ model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
222
+
223
+ # Load the state_dict into the model
224
+ model.load_state_dict(state_dict)
225
+
226
+ return model
227
+
228
+
229
+ class CustomViTConfig(PretrainedConfig):
230
+ model_type = "custom-vit"
231
+
232
+ def __init__(self, num_labels=2, **kwargs):
233
+ super().__init__(**kwargs)
234
+ self.num_labels = num_labels # Register number of labels (output dimensions)
235
+
236
+ class CustomViTModel(PreTrainedModel):
237
+ config_class = CustomViTConfig
238
+
239
+ def __init__(self, config, model_name="google/vit-base-patch16-224",
240
+ num_classes=2, train_final_layer_only=False):
241
+ super().__init__(config)
242
+
243
+ # Load pre-trained ViT model from Hugging Face
244
+ self.vit = AutoModelForImageClassification.from_pretrained(model_name)
245
+
246
+ # Access the input features of the existing classifier
247
+ in_features = self.vit.classifier.in_features
248
+
249
+ # Modify the classifier layer to match the number of output classes
250
+ self.vit.classifier = nn.Linear(in_features, num_classes)
251
+
252
+ # Freeze previous weights if only training the final layer
253
+ if train_final_layer_only:
254
+ for name, param in self.vit.named_parameters():
255
+ if "classifier" not in name:
256
+ param.requires_grad = False
257
+ else:
258
+ print(f"Unfrozen layer: {name}")
259
+
260
+ def forward(self, x):
261
+ return self.vit(x)
262
+
263
+ @classmethod
264
+ def from_pretrained(cls, repo_id, model_name="google/vit-base-patch16-224", **kwargs):
265
+ # Attempt to download the safetensors model file
266
+ try:
267
+ model_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors")
268
+ state_dict = load_file(model_path)
269
+ except Exception as e:
270
+ raise ValueError(
271
+ f"Failed to download or load 'model.safetensors' from {repo_id}. Ensure the file exists."
272
+ ) from e
273
+
274
+ # Download config.json from Hugging Face Hub
275
+ config_path = hf_hub_download(repo_id=repo_id, filename="config.json")
276
+
277
+ # Load configuration
278
+ config = CustomViTConfig.from_pretrained(config_path)
279
+
280
+ # Create the model
281
+ model = cls(config=config, model_name=model_name, num_classes=config.num_labels)
282
+
283
+ # Load the state_dict into the model
284
+ model.load_state_dict(state_dict)
285
+
286
+ return model
287
+
288
+
289
+ # Define the WeightedEnsembleModel class
290
+ class WeightedEnsembleModel(nn.Module):
291
+ def __init__(self, models, weights):
292
+ """
293
+ Initialize the ensemble model with individual models and their weights.
294
+ """
295
+ super(WeightedEnsembleModel, self).__init__()
296
+ self.models = nn.ModuleList(models) # Wrap models in ModuleList
297
+ self.weights = weights
298
+
299
+ def forward(self, images):
300
+ """
301
+ Forward pass for the ensemble model.
302
+ Performs weighted averaging of logits from individual models.
303
+ """
304
+ ensemble_logits = torch.zeros((images.size(0), 2)).to(images.device) # Initialize logits
305
+ for model, weight in zip(self.models, self.weights):
306
+ outputs = model(images)
307
+ logits = outputs.logits if hasattr(outputs, "logits") else outputs # Extract logits
308
+ ensemble_logits += weight * logits # Weighted sum of logits
309
+ return ensemble_logits
310
+
311
+
312
+
313
+ ```
314
+
315
+
316
+ Now, load the model weights from huggingface.
317
+ ```python
318
+ from transformers import AutoModelForImageClassification
319
+ import torch
320
+ from sklearn.metrics import mean_absolute_error, mean_squared_error
321
+ import matplotlib.pyplot as plt
322
+ import numpy as np
323
+
324
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
325
+ ```
326
+
327
+ ```python
328
+ #resnet
329
+ resnet = CustomResNetModel.from_pretrained(
330
+ "final-project-5190/model-resnet-50-base",
331
+ model_name="microsoft/resnet-50"
332
+ )
333
+
334
+ #convnext
335
+ convnext=CustomConvNeXtModel.from_pretrained(
336
+ "final-project-5190/model-convnext-tiny-reducePlateau",
337
+ model_name="facebook/convnext-tiny-224")
338
+
339
+ #vit
340
+ vit = CustomViTModel.from_pretrained(
341
+ "final-project-5190/model-ViT-base",
342
+ model_name="google/vit-base-patch16-224"
343
+ )
344
+
345
+ #efficientnet
346
+ efficientnet = CustomEfficientNetModel.from_pretrained(
347
+ "final-project-5190/model-efficientnet-b0-base",
348
+ model_name="google/efficientnet-b0"
349
+ )
350
+
351
+ models = [convnext, resnet, vit, efficientnet]
352
+ weights = [0.28, 0.26, 0.20, 0.27]
353
+ ```
354
+
355
+
356
+
357
+ #### For data loading
358
+ ```python
359
+ # Download
360
+ from datasets import load_dataset, Image
361
+ ```
362
+
363
+ ```python
364
+ import torch
365
+ import torch.nn as nn
366
+ import torchvision.models as models
367
+ import torchvision.transforms as transforms
368
+ from torch.utils.data import DataLoader, Dataset
369
+ from transformers import AutoImageProcessor, AutoModelForImageClassification, AutoConfig
370
+ from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
371
+ from PIL import Image
372
+ import os
373
+ import numpy as np
374
+
375
+ class GPSImageDataset(Dataset):
376
+ def __init__(self, hf_dataset, transform=None, lat_mean=None, lat_std=None, lon_mean=None, lon_std=None):
377
+ self.hf_dataset = hf_dataset
378
+ self.transform = transform
379
+
380
+ # Compute mean and std from the dataframe if not provided
381
+ self.latitude_mean = lat_mean if lat_mean is not None else np.mean(np.array(self.hf_dataset['Latitude']))
382
+ self.latitude_std = lat_std if lat_std is not None else np.std(np.array(self.hf_dataset['Latitude']))
383
+ self.longitude_mean = lon_mean if lon_mean is not None else np.mean(np.array(self.hf_dataset['Longitude']))
384
+ self.longitude_std = lon_std if lon_std is not None else np.std(np.array(self.hf_dataset['Longitude']))
385
+
386
+ def __len__(self):
387
+ return len(self.hf_dataset)
388
+
389
+ def __getitem__(self, idx):
390
+ # Extract data
391
+ example = self.hf_dataset[idx]
392
+
393
+ # Load and process the image
394
+ image = example['image']
395
+ latitude = example['Latitude']
396
+ longitude = example['Longitude']
397
+ # image = image.rotate(-90, expand=True)
398
+ if self.transform:
399
+ image = self.transform(image)
400
+
401
+ # Normalize GPS coordinates
402
+ latitude = (latitude - self.latitude_mean) / self.latitude_std
403
+ longitude = (longitude - self.longitude_mean) / self.longitude_std
404
+ gps_coords = torch.tensor([latitude, longitude], dtype=torch.float32)
405
+
406
+ return image, gps_coords
407
+ ```
408
+
409
+ ```python
410
+ # Dataloader + Visualize
411
+ transform = transforms.Compose([
412
+ transforms.RandomResizedCrop(224), # Random crop and resize to 224x224
413
+ transforms.RandomHorizontalFlip(), # Random horizontal flip
414
+ # transforms.RandomRotation(degrees=15), # Random rotation between -15 and 15 degrees
415
+ transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), # Random color jitter
416
+ # transforms.GaussianBlur(kernel_size=(3, 5), sigma=(0.1, 2.0)),
417
+ # transforms.RandomPerspective(distortion_scale=0.5, p=0.5),
418
+ transforms.ToTensor(),
419
+
420
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
421
+ std=[0.229, 0.224, 0.225])
422
+ ])
423
+
424
+ # Optionally, you can create a separate transform for inference without augmentations
425
+ inference_transform = transforms.Compose([
426
+ transforms.Resize((224, 224)),
427
+ transforms.ToTensor(),
428
+ transforms.Normalize(mean=[0.485, 0.456, 0.406],
429
+ std=[0.229, 0.224, 0.225])
430
+ ])
431
+ ```
432
+
433
+ Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE.
434
+
435
+ ```python
436
+ # Load test data
437
+ release_data = load_dataset("gydou/released_img", split="train")
438
+ ```
439
+
440
+ ```python
441
+ # Create dataset and dataloader using training mean and std
442
+ rel_dataset = GPSImageDataset(
443
+ hf_dataset=release_data,
444
+ transform=inference_transform,
445
+ lat_mean=lat_mean,
446
+ lat_std=lat_std,
447
+ lon_mean=lon_mean,
448
+ lon_std=lon_std
449
+ )
450
+ rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False)
451
+ ```
452
+
453
+
454
+ ```python
455
+ # ensemble
456
+ ensemble_model = WeightedEnsembleModel(models=models, weights=weights).to(device)
457
+
458
+ # Validation
459
+ all_preds = []
460
+ all_actuals = []
461
+
462
+ ensemble_model.eval()
463
+ with torch.no_grad():
464
+ for images, gps_coords in rel_dataloader:
465
+ images, gps_coords = images.to(device), gps_coords.to(device)
466
+
467
+ # Weighted ensemble prediction using the new model
468
+ ensemble_logits = ensemble_model(images)
469
+
470
+ # Denormalize predictions and actual values
471
+ preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
472
+ actuals = gps_coords.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
473
+
474
+ all_preds.append(preds)
475
+ all_actuals.append(actuals)
476
+
477
+ # Concatenate all batches
478
+ all_preds = torch.cat(all_preds).numpy()
479
+ all_actuals = torch.cat(all_actuals).numpy()
480
+
481
+ # Compute error metrics
482
+ mae = mean_absolute_error(all_actuals, all_preds)
483
+ rmse = mean_squared_error(all_actuals, all_preds, squared=False)
484
+
485
+ print(f'Mean Absolute Error: {mae}')
486
+ print(f'Root Mean Squared Error: {rmse}')
487
+
488
+ # Convert predictions and actuals to meters
489
+ latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine
490
+ meters_per_degree_latitude = 111000 # Constant
491
+ meters_per_degree_longitude = 111000 * np.cos(latitude_mean_radians) # Adjusted for latitude mean
492
+
493
+ all_preds_meters = all_preds.copy()
494
+ all_preds_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
495
+ all_preds_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters
496
+
497
+ all_actuals_meters = all_actuals.copy()
498
+ all_actuals_meters[:, 0] *= meters_per_degree_latitude # Latitude to meters
499
+ all_actuals_meters[:, 1] *= meters_per_degree_longitude # Longitude to meters
500
+
501
+ # Compute error metrics in meters
502
+ mae_meters = mean_absolute_error(all_actuals_meters, all_preds_meters)
503
+ rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=False)
504
+
505
+ print(f"Mean Absolute Error (meters): {mae_meters:.2f}")
506
+ print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}")
507
+
508
+ ```
509
+
510
+ After running inference on the release test set, our results are the following.
511
+ - Release Dataset Mean Absolute Error: 0.0004267849560326909
512
+ - Release Dataset Root Mean Squared Error: 0.0005247778631268114
513
+ - Mean Absolute Error (meters): 41.90
514
+ - Root Mean Squared Error (meters): 51.29