|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.0, |
|
"eval_steps": 500, |
|
"global_step": 21008, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009873064229499874, |
|
"loss": 2.3443, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0009746128458999747, |
|
"loss": 1.6863, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.575725640796642, |
|
"eval_loss": 1.955936312675476, |
|
"eval_runtime": 2249.8295, |
|
"eval_samples_per_second": 74.652, |
|
"eval_steps_per_second": 0.146, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0009619192688499619, |
|
"loss": 1.4821, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0009492256917999492, |
|
"loss": 1.3492, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0009365321147499366, |
|
"loss": 1.3275, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.606090917209967, |
|
"eval_loss": 1.8276444673538208, |
|
"eval_runtime": 1553.5911, |
|
"eval_samples_per_second": 108.108, |
|
"eval_steps_per_second": 0.212, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0009238385376999238, |
|
"loss": 1.1767, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0009111449606499112, |
|
"loss": 1.151, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6130332529546605, |
|
"eval_loss": 1.885665774345398, |
|
"eval_runtime": 1611.9735, |
|
"eval_samples_per_second": 104.192, |
|
"eval_steps_per_second": 0.204, |
|
"step": 3939 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0008984513835998985, |
|
"loss": 1.1393, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.0008857578065498858, |
|
"loss": 1.0116, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0008730642294998731, |
|
"loss": 1.0336, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6322288708284958, |
|
"eval_loss": 1.8160221576690674, |
|
"eval_runtime": 1568.2238, |
|
"eval_samples_per_second": 107.099, |
|
"eval_steps_per_second": 0.21, |
|
"step": 5252 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0008603706524498604, |
|
"loss": 0.9722, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0008476770753998477, |
|
"loss": 0.9309, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.000834983498349835, |
|
"loss": 0.947, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6317465987913429, |
|
"eval_loss": 1.8050605058670044, |
|
"eval_runtime": 1542.405, |
|
"eval_samples_per_second": 108.892, |
|
"eval_steps_per_second": 0.213, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.0008222899212998223, |
|
"loss": 0.8427, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.0008095963442498096, |
|
"loss": 0.8595, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6442737638057813, |
|
"eval_loss": 1.7996306419372559, |
|
"eval_runtime": 1561.8382, |
|
"eval_samples_per_second": 107.537, |
|
"eval_steps_per_second": 0.211, |
|
"step": 7878 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.0007969027671997969, |
|
"loss": 0.8393, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 0.0007842091901497842, |
|
"loss": 0.7719, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.0007715156130997715, |
|
"loss": 0.801, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6533952546813134, |
|
"eval_loss": 1.7987133264541626, |
|
"eval_runtime": 1549.5067, |
|
"eval_samples_per_second": 108.393, |
|
"eval_steps_per_second": 0.212, |
|
"step": 9191 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 0.0007588220360497588, |
|
"loss": 0.7443, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 0.0007461284589997462, |
|
"loss": 0.7296, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0007334348819497334, |
|
"loss": 0.7508, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6521925515763151, |
|
"eval_loss": 1.7864270210266113, |
|
"eval_runtime": 1567.4501, |
|
"eval_samples_per_second": 107.152, |
|
"eval_steps_per_second": 0.21, |
|
"step": 10504 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.0007207413048997208, |
|
"loss": 0.6659, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.0007080477278497081, |
|
"loss": 0.694, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6525795600011908, |
|
"eval_loss": 1.8871492147445679, |
|
"eval_runtime": 1630.371, |
|
"eval_samples_per_second": 103.016, |
|
"eval_steps_per_second": 0.202, |
|
"step": 11817 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.0006953541507996953, |
|
"loss": 0.6611, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.0006826605737496827, |
|
"loss": 0.6326, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.00066996699669967, |
|
"loss": 0.6523, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6648149802030305, |
|
"eval_loss": 1.805672287940979, |
|
"eval_runtime": 1557.784, |
|
"eval_samples_per_second": 107.817, |
|
"eval_steps_per_second": 0.211, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 0.0006572734196496572, |
|
"loss": 0.602, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 0.0006445798425996446, |
|
"loss": 0.5976, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6706736923580721, |
|
"eval_loss": 1.8513981103897095, |
|
"eval_runtime": 1548.9015, |
|
"eval_samples_per_second": 108.435, |
|
"eval_steps_per_second": 0.212, |
|
"step": 14443 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.0006318862655496319, |
|
"loss": 0.6049, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 0.0006191926884996192, |
|
"loss": 0.5476, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 0.0006064991114496065, |
|
"loss": 0.5743, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6628501681998155, |
|
"eval_loss": 1.927115559577942, |
|
"eval_runtime": 1525.7141, |
|
"eval_samples_per_second": 110.083, |
|
"eval_steps_per_second": 0.216, |
|
"step": 15756 |
|
}, |
|
{ |
|
"epoch": 12.19, |
|
"learning_rate": 0.0005938055343995938, |
|
"loss": 0.5441, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.0005811119573495812, |
|
"loss": 0.5232, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 0.0005684183802995684, |
|
"loss": 0.5426, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6691911523920098, |
|
"eval_loss": 1.9220695495605469, |
|
"eval_runtime": 1530.5139, |
|
"eval_samples_per_second": 109.738, |
|
"eval_steps_per_second": 0.215, |
|
"step": 17069 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.0005557248032495558, |
|
"loss": 0.5004, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.0005430312261995431, |
|
"loss": 0.5092, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6751987139412342, |
|
"eval_loss": 1.9163544178009033, |
|
"eval_runtime": 1663.4966, |
|
"eval_samples_per_second": 100.965, |
|
"eval_steps_per_second": 0.198, |
|
"step": 18382 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 0.0005303376491495303, |
|
"loss": 0.5017, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 0.0005176440720995177, |
|
"loss": 0.4729, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.000504950495049505, |
|
"loss": 0.4808, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6743115715519038, |
|
"eval_loss": 1.925864815711975, |
|
"eval_runtime": 1533.1814, |
|
"eval_samples_per_second": 109.547, |
|
"eval_steps_per_second": 0.215, |
|
"step": 19695 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 0.0004922569179994923, |
|
"loss": 0.4595, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 0.00047956334094947955, |
|
"loss": 0.4489, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.00046686976389946687, |
|
"loss": 0.4611, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6784972165163288, |
|
"eval_loss": 1.9857261180877686, |
|
"eval_runtime": 1537.6389, |
|
"eval_samples_per_second": 109.229, |
|
"eval_steps_per_second": 0.214, |
|
"step": 21008 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 21008, |
|
"total_flos": 7.331020593786202e+20, |
|
"train_loss": 0.0, |
|
"train_runtime": 372.6046, |
|
"train_samples_per_second": 27045.44, |
|
"train_steps_per_second": 52.858 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 19695, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 7.331020593786202e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|