|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "vit-large-patch16-224-in21k-dungeon-geo-morphs-denoised-04Dec24-003/checkpoint-60", |
|
"epoch": 32.0, |
|
"eval_steps": 10, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 15.498607635498047, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5126, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6383838383838384, |
|
"eval_loss": 1.33207368850708, |
|
"eval_runtime": 6.5954, |
|
"eval_samples_per_second": 75.052, |
|
"eval_steps_per_second": 9.4, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 16.96432113647461, |
|
"learning_rate": 1.7222222222222224e-05, |
|
"loss": 1.049, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8444444444444444, |
|
"eval_loss": 0.7613236904144287, |
|
"eval_runtime": 6.9925, |
|
"eval_samples_per_second": 70.79, |
|
"eval_steps_per_second": 8.867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 18.313573837280273, |
|
"learning_rate": 1.4444444444444446e-05, |
|
"loss": 0.5397, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9434343434343434, |
|
"eval_loss": 0.4086134731769562, |
|
"eval_runtime": 6.411, |
|
"eval_samples_per_second": 77.211, |
|
"eval_steps_per_second": 9.671, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 16.45941162109375, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.2381, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.98989898989899, |
|
"eval_loss": 0.20247994363307953, |
|
"eval_runtime": 7.3649, |
|
"eval_samples_per_second": 67.21, |
|
"eval_steps_per_second": 8.418, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 10.92023754119873, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1152, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.98989898989899, |
|
"eval_loss": 0.11596124619245529, |
|
"eval_runtime": 6.844, |
|
"eval_samples_per_second": 72.327, |
|
"eval_steps_per_second": 9.059, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 9.070709228515625, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.058, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.07251805812120438, |
|
"eval_runtime": 6.6661, |
|
"eval_samples_per_second": 74.256, |
|
"eval_steps_per_second": 9.301, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 8.501797676086426, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0392, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.98989898989899, |
|
"eval_loss": 0.06781990826129913, |
|
"eval_runtime": 6.4896, |
|
"eval_samples_per_second": 76.276, |
|
"eval_steps_per_second": 9.554, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 1.17595374584198, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.026, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9959595959595959, |
|
"eval_loss": 0.04437047615647316, |
|
"eval_runtime": 6.7537, |
|
"eval_samples_per_second": 73.293, |
|
"eval_steps_per_second": 9.18, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"step": 80, |
|
"total_flos": 7.012786101918106e+17, |
|
"train_loss": 0.44722552597522736, |
|
"train_runtime": 549.2596, |
|
"train_samples_per_second": 5.826, |
|
"train_steps_per_second": 0.146 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 80, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 2 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.012786101918106e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|