Update README
Browse files
README.md
CHANGED
@@ -24,10 +24,10 @@ lon_std = 0.0007030395253318959
|
|
24 |
```
|
25 |
|
26 |
### Instructions
|
27 |
-
Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run
|
28 |
|
29 |
#### Load and define models
|
30 |
-
|
31 |
```python
|
32 |
from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel
|
33 |
import torch
|
@@ -91,11 +91,6 @@ class CustomConvNeXtModel(PreTrainedModel):
|
|
91 |
|
92 |
return model
|
93 |
|
94 |
-
```
|
95 |
-
|
96 |
-
2. ResNet
|
97 |
-
``` python
|
98 |
-
from huggingface_hub import PyTorchModelHubMixin, hf_hub_download
|
99 |
|
100 |
class CustomResNetConfig(PretrainedConfig):
|
101 |
model_type = "custom-resnet"
|
@@ -166,14 +161,7 @@ class CustomResNetModel(nn.Module, PyTorchModelHubMixin):
|
|
166 |
model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
|
167 |
|
168 |
return model
|
169 |
-
```
|
170 |
|
171 |
-
3. EfficientNet
|
172 |
-
``` python
|
173 |
-
from huggingface_hub import hf_hub_download
|
174 |
-
from safetensors.torch import load_file # Make sure to import this
|
175 |
-
from transformers import AutoModelForImageClassification, PreTrainedModel, PretrainedConfig
|
176 |
-
import torch.nn as nn
|
177 |
|
178 |
class CustomEfficientNetConfig(PretrainedConfig):
|
179 |
model_type = "custom-efficientnet"
|
@@ -236,14 +224,7 @@ class CustomEfficientNetModel(PreTrainedModel):
|
|
236 |
model.load_state_dict(state_dict)
|
237 |
|
238 |
return model
|
239 |
-
```
|
240 |
|
241 |
-
4. ViT
|
242 |
-
```python
|
243 |
-
from huggingface_hub import hf_hub_download
|
244 |
-
from safetensors.torch import load_file
|
245 |
-
from transformers import AutoModelForImageClassification, PreTrainedModel, PretrainedConfig
|
246 |
-
import torch.nn as nn
|
247 |
|
248 |
class CustomViTConfig(PretrainedConfig):
|
249 |
model_type = "custom-vit"
|
@@ -303,8 +284,35 @@ class CustomViTModel(PreTrainedModel):
|
|
303 |
model.load_state_dict(state_dict)
|
304 |
|
305 |
return model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
```
|
307 |
|
|
|
308 |
Now, load the model weights from huggingface.
|
309 |
```python
|
310 |
from transformers import AutoModelForImageClassification
|
@@ -312,6 +320,8 @@ import torch
|
|
312 |
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
313 |
import matplotlib.pyplot as plt
|
314 |
import numpy as np
|
|
|
|
|
315 |
```
|
316 |
|
317 |
```python
|
@@ -320,32 +330,30 @@ resnet = CustomResNetModel.from_pretrained(
|
|
320 |
"final-project-5190/model-resnet-50-base",
|
321 |
model_name="microsoft/resnet-50"
|
322 |
)
|
323 |
-
```
|
324 |
|
325 |
-
|
326 |
-
```python
|
327 |
#convnext
|
328 |
convnext=CustomConvNeXtModel.from_pretrained(
|
329 |
"final-project-5190/model-convnext-tiny-reducePlateau",
|
330 |
model_name="facebook/convnext-tiny-224")
|
331 |
-
```
|
332 |
|
333 |
-
```python
|
334 |
#vit
|
335 |
vit = CustomViTModel.from_pretrained(
|
336 |
"final-project-5190/model-ViT-base",
|
337 |
model_name="google/vit-base-patch16-224"
|
338 |
)
|
339 |
-
```
|
340 |
|
341 |
-
```python
|
342 |
#efficientnet
|
343 |
efficientnet = CustomEfficientNetModel.from_pretrained(
|
344 |
"final-project-5190/model-efficientnet-b0-base",
|
345 |
model_name="google/efficientnet-b0"
|
346 |
)
|
|
|
|
|
|
|
347 |
```
|
348 |
|
|
|
|
|
349 |
#### For data loading
|
350 |
```python
|
351 |
# Download
|
@@ -422,34 +430,6 @@ inference_transform = transforms.Compose([
|
|
422 |
])
|
423 |
```
|
424 |
|
425 |
-
### Ensemble
|
426 |
-
Define Ensemble (weighted average) and prepare model
|
427 |
-
```python
|
428 |
-
models = [convnext, resnet, vit, efficientnet]
|
429 |
-
weights = [0.28, 0.26, 0.20, 0.27] # based on val 1/RMSE
|
430 |
-
```
|
431 |
-
|
432 |
-
```python
|
433 |
-
# Weighted ensemble prediction function
|
434 |
-
def weighted_ensemble_predict(models, weights, images):
|
435 |
-
"""
|
436 |
-
Generate weighted ensemble predictions by averaging logits using model weights.
|
437 |
-
"""
|
438 |
-
ensemble_logits = torch.zeros((images.size(0), 2)).to(images.device) # Initialize logits for ensemble
|
439 |
-
for model, weight in zip(models, weights):
|
440 |
-
outputs = model(images)
|
441 |
-
logits = outputs.logits if hasattr(outputs, "logits") else outputs # Extract logits
|
442 |
-
ensemble_logits += weight * logits # Weighted sum of logits
|
443 |
-
return ensemble_logits # Return the weighted logits sum (no division since weights sum to 1)
|
444 |
-
|
445 |
-
|
446 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
447 |
-
for model in models:
|
448 |
-
model.to(device)
|
449 |
-
model.eval()
|
450 |
-
|
451 |
-
```
|
452 |
-
|
453 |
Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE.
|
454 |
|
455 |
```python
|
@@ -472,24 +452,20 @@ rel_dataloader = DataLoader(rel_dataset, batch_size=32, shuffle=False)
|
|
472 |
|
473 |
|
474 |
```python
|
475 |
-
#
|
|
|
476 |
|
477 |
-
#
|
478 |
all_preds = []
|
479 |
all_actuals = []
|
480 |
|
481 |
-
|
482 |
-
for model in models:
|
483 |
-
model.to(device)
|
484 |
-
model.eval()
|
485 |
-
|
486 |
-
# Perform inference on release dataset
|
487 |
with torch.no_grad():
|
488 |
for images, gps_coords in rel_dataloader:
|
489 |
images, gps_coords = images.to(device), gps_coords.to(device)
|
490 |
|
491 |
-
# Weighted ensemble prediction
|
492 |
-
ensemble_logits =
|
493 |
|
494 |
# Denormalize predictions and actual values
|
495 |
preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
|
@@ -506,8 +482,8 @@ all_actuals = torch.cat(all_actuals).numpy()
|
|
506 |
mae = mean_absolute_error(all_actuals, all_preds)
|
507 |
rmse = mean_squared_error(all_actuals, all_preds, squared=False)
|
508 |
|
509 |
-
print(f'
|
510 |
-
print(f'
|
511 |
|
512 |
# Convert predictions and actuals to meters
|
513 |
latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine
|
@@ -528,6 +504,7 @@ rmse_meters = mean_squared_error(all_actuals_meters, all_preds_meters, squared=F
|
|
528 |
|
529 |
print(f"Mean Absolute Error (meters): {mae_meters:.2f}")
|
530 |
print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}")
|
|
|
531 |
```
|
532 |
|
533 |
After running inference on the release test set, our results are the following.
|
|
|
24 |
```
|
25 |
|
26 |
### Instructions
|
27 |
+
Our current best performing model is an ensemble of multiple models. To run it on hidden test data, first run the model definitions.
|
28 |
|
29 |
#### Load and define models
|
30 |
+
|
31 |
```python
|
32 |
from transformers import AutoModelForImageClassification, PretrainedConfig, PreTrainedModel
|
33 |
import torch
|
|
|
91 |
|
92 |
return model
|
93 |
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
class CustomResNetConfig(PretrainedConfig):
|
96 |
model_type = "custom-resnet"
|
|
|
161 |
model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
|
162 |
|
163 |
return model
|
|
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
class CustomEfficientNetConfig(PretrainedConfig):
|
167 |
model_type = "custom-efficientnet"
|
|
|
224 |
model.load_state_dict(state_dict)
|
225 |
|
226 |
return model
|
|
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
class CustomViTConfig(PretrainedConfig):
|
230 |
model_type = "custom-vit"
|
|
|
284 |
model.load_state_dict(state_dict)
|
285 |
|
286 |
return model
|
287 |
+
|
288 |
+
|
289 |
+
# Define the WeightedEnsembleModel class
|
290 |
+
class WeightedEnsembleModel(nn.Module):
|
291 |
+
def __init__(self, models, weights):
|
292 |
+
"""
|
293 |
+
Initialize the ensemble model with individual models and their weights.
|
294 |
+
"""
|
295 |
+
super(WeightedEnsembleModel, self).__init__()
|
296 |
+
self.models = nn.ModuleList(models) # Wrap models in ModuleList
|
297 |
+
self.weights = weights
|
298 |
+
|
299 |
+
def forward(self, images):
|
300 |
+
"""
|
301 |
+
Forward pass for the ensemble model.
|
302 |
+
Performs weighted averaging of logits from individual models.
|
303 |
+
"""
|
304 |
+
ensemble_logits = torch.zeros((images.size(0), 2)).to(images.device) # Initialize logits
|
305 |
+
for model, weight in zip(self.models, self.weights):
|
306 |
+
outputs = model(images)
|
307 |
+
logits = outputs.logits if hasattr(outputs, "logits") else outputs # Extract logits
|
308 |
+
ensemble_logits += weight * logits # Weighted sum of logits
|
309 |
+
return ensemble_logits
|
310 |
+
|
311 |
+
|
312 |
+
|
313 |
```
|
314 |
|
315 |
+
|
316 |
Now, load the model weights from huggingface.
|
317 |
```python
|
318 |
from transformers import AutoModelForImageClassification
|
|
|
320 |
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
321 |
import matplotlib.pyplot as plt
|
322 |
import numpy as np
|
323 |
+
|
324 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
325 |
```
|
326 |
|
327 |
```python
|
|
|
330 |
"final-project-5190/model-resnet-50-base",
|
331 |
model_name="microsoft/resnet-50"
|
332 |
)
|
|
|
333 |
|
|
|
|
|
334 |
#convnext
|
335 |
convnext=CustomConvNeXtModel.from_pretrained(
|
336 |
"final-project-5190/model-convnext-tiny-reducePlateau",
|
337 |
model_name="facebook/convnext-tiny-224")
|
|
|
338 |
|
|
|
339 |
#vit
|
340 |
vit = CustomViTModel.from_pretrained(
|
341 |
"final-project-5190/model-ViT-base",
|
342 |
model_name="google/vit-base-patch16-224"
|
343 |
)
|
|
|
344 |
|
|
|
345 |
#efficientnet
|
346 |
efficientnet = CustomEfficientNetModel.from_pretrained(
|
347 |
"final-project-5190/model-efficientnet-b0-base",
|
348 |
model_name="google/efficientnet-b0"
|
349 |
)
|
350 |
+
|
351 |
+
models = [convnext, resnet, vit, efficientnet]
|
352 |
+
weights = [0.28, 0.26, 0.20, 0.27]
|
353 |
```
|
354 |
|
355 |
+
|
356 |
+
|
357 |
#### For data loading
|
358 |
```python
|
359 |
# Download
|
|
|
430 |
])
|
431 |
```
|
432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
433 |
Here's an exmaple of us testing the ensemble on the release test set. You can just change the load release_data line below and run the rest of the code to obtain rMSE.
|
434 |
|
435 |
```python
|
|
|
452 |
|
453 |
|
454 |
```python
|
455 |
+
# ensemble
|
456 |
+
ensemble_model = WeightedEnsembleModel(models=models, weights=weights).to(device)
|
457 |
|
458 |
+
# Validation
|
459 |
all_preds = []
|
460 |
all_actuals = []
|
461 |
|
462 |
+
ensemble_model.eval()
|
|
|
|
|
|
|
|
|
|
|
463 |
with torch.no_grad():
|
464 |
for images, gps_coords in rel_dataloader:
|
465 |
images, gps_coords = images.to(device), gps_coords.to(device)
|
466 |
|
467 |
+
# Weighted ensemble prediction using the new model
|
468 |
+
ensemble_logits = ensemble_model(images)
|
469 |
|
470 |
# Denormalize predictions and actual values
|
471 |
preds = ensemble_logits.cpu() * torch.tensor([lat_std, lon_std]) + torch.tensor([lat_mean, lon_mean])
|
|
|
482 |
mae = mean_absolute_error(all_actuals, all_preds)
|
483 |
rmse = mean_squared_error(all_actuals, all_preds, squared=False)
|
484 |
|
485 |
+
print(f'Mean Absolute Error: {mae}')
|
486 |
+
print(f'Root Mean Squared Error: {rmse}')
|
487 |
|
488 |
# Convert predictions and actuals to meters
|
489 |
latitude_mean_radians = np.radians(lat_mean) # Convert to radians for cosine
|
|
|
504 |
|
505 |
print(f"Mean Absolute Error (meters): {mae_meters:.2f}")
|
506 |
print(f"Root Mean Squared Error (meters): {rmse_meters:.2f}")
|
507 |
+
|
508 |
```
|
509 |
|
510 |
After running inference on the release test set, our results are the following.
|