|
{ |
|
"best_metric": 0.8174300254452926, |
|
"best_model_checkpoint": "training_sentiment_analysis/checkpoint-8600", |
|
"epoch": 20.0, |
|
"eval_steps": 200, |
|
"global_step": 18680, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.3381836414337158, |
|
"learning_rate": 3.2119914346895075e-05, |
|
"loss": 0.9299, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.638676844783715, |
|
"eval_loss": 0.827367901802063, |
|
"eval_runtime": 3.055, |
|
"eval_samples_per_second": 514.569, |
|
"eval_steps_per_second": 16.367, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.0220164060592651, |
|
"learning_rate": 6.423982869379015e-05, |
|
"loss": 0.7793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.7188295165394402, |
|
"eval_loss": 0.6643335223197937, |
|
"eval_runtime": 3.0013, |
|
"eval_samples_per_second": 523.77, |
|
"eval_steps_per_second": 16.659, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7421491146087646, |
|
"learning_rate": 9.635974304068522e-05, |
|
"loss": 0.6574, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.7659033078880407, |
|
"eval_loss": 0.5868020057678223, |
|
"eval_runtime": 2.9749, |
|
"eval_samples_per_second": 528.422, |
|
"eval_steps_per_second": 16.807, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.8133894205093384, |
|
"learning_rate": 0.0001284796573875803, |
|
"loss": 0.6132, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.772264631043257, |
|
"eval_loss": 0.5582301616668701, |
|
"eval_runtime": 2.9908, |
|
"eval_samples_per_second": 525.617, |
|
"eval_steps_per_second": 16.718, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 1.3071078062057495, |
|
"learning_rate": 0.00016059957173447537, |
|
"loss": 0.5791, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.7830788804071247, |
|
"eval_loss": 0.5515692234039307, |
|
"eval_runtime": 2.9665, |
|
"eval_samples_per_second": 529.915, |
|
"eval_steps_per_second": 16.855, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.0445743799209595, |
|
"learning_rate": 0.00019271948608137044, |
|
"loss": 0.554, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.7964376590330788, |
|
"eval_loss": 0.5187413692474365, |
|
"eval_runtime": 2.9846, |
|
"eval_samples_per_second": 526.705, |
|
"eval_steps_per_second": 16.753, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.0763362646102905, |
|
"learning_rate": 0.0002248394004282655, |
|
"loss": 0.5258, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.8034351145038168, |
|
"eval_loss": 0.5125576257705688, |
|
"eval_runtime": 2.9831, |
|
"eval_samples_per_second": 526.967, |
|
"eval_steps_per_second": 16.761, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 0.8554897308349609, |
|
"learning_rate": 0.0002569593147751606, |
|
"loss": 0.5373, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.8002544529262087, |
|
"eval_loss": 0.51680988073349, |
|
"eval_runtime": 2.9726, |
|
"eval_samples_per_second": 528.823, |
|
"eval_steps_per_second": 16.82, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.538806438446045, |
|
"learning_rate": 0.0002890792291220556, |
|
"loss": 0.5266, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_accuracy": 0.8027989821882952, |
|
"eval_loss": 0.5283887982368469, |
|
"eval_runtime": 2.9766, |
|
"eval_samples_per_second": 528.12, |
|
"eval_steps_per_second": 16.798, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 1.1234441995620728, |
|
"learning_rate": 0.000297644539614561, |
|
"loss": 0.5076, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.7977099236641222, |
|
"eval_loss": 0.5178301334381104, |
|
"eval_runtime": 2.9829, |
|
"eval_samples_per_second": 526.996, |
|
"eval_steps_per_second": 16.762, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 1.6212774515151978, |
|
"learning_rate": 0.0002940756602426838, |
|
"loss": 0.5094, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.8027989821882952, |
|
"eval_loss": 0.5134572982788086, |
|
"eval_runtime": 2.981, |
|
"eval_samples_per_second": 527.334, |
|
"eval_steps_per_second": 16.773, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 1.4514294862747192, |
|
"learning_rate": 0.00029050678087080655, |
|
"loss": 0.5032, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.8104325699745547, |
|
"eval_loss": 0.5022692084312439, |
|
"eval_runtime": 2.963, |
|
"eval_samples_per_second": 530.535, |
|
"eval_steps_per_second": 16.875, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 1.826932668685913, |
|
"learning_rate": 0.0002869379014989293, |
|
"loss": 0.5034, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_accuracy": 0.80470737913486, |
|
"eval_loss": 0.5088226199150085, |
|
"eval_runtime": 2.9831, |
|
"eval_samples_per_second": 526.969, |
|
"eval_steps_per_second": 16.761, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.4404336214065552, |
|
"learning_rate": 0.0002833690221270521, |
|
"loss": 0.4923, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.799618320610687, |
|
"eval_loss": 0.5219257473945618, |
|
"eval_runtime": 2.9722, |
|
"eval_samples_per_second": 528.9, |
|
"eval_steps_per_second": 16.823, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 0.8795878291130066, |
|
"learning_rate": 0.00027980014275517484, |
|
"loss": 0.4934, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.8129770992366412, |
|
"eval_loss": 0.4905295968055725, |
|
"eval_runtime": 2.9734, |
|
"eval_samples_per_second": 528.696, |
|
"eval_steps_per_second": 16.816, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 1.6092537641525269, |
|
"learning_rate": 0.0002762312633832976, |
|
"loss": 0.4798, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_accuracy": 0.8097964376590331, |
|
"eval_loss": 0.4907812178134918, |
|
"eval_runtime": 2.9897, |
|
"eval_samples_per_second": 525.803, |
|
"eval_steps_per_second": 16.724, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 1.6475110054016113, |
|
"learning_rate": 0.0002726623840114204, |
|
"loss": 0.4831, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.48748457431793213, |
|
"eval_runtime": 2.9694, |
|
"eval_samples_per_second": 529.396, |
|
"eval_steps_per_second": 16.838, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 1.1669467687606812, |
|
"learning_rate": 0.00026909350463954313, |
|
"loss": 0.4707, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.4985896944999695, |
|
"eval_runtime": 2.991, |
|
"eval_samples_per_second": 525.579, |
|
"eval_steps_per_second": 16.717, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"grad_norm": 0.9440352320671082, |
|
"learning_rate": 0.00026552462526766593, |
|
"loss": 0.4674, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_accuracy": 0.8104325699745547, |
|
"eval_loss": 0.5195557475090027, |
|
"eval_runtime": 2.9789, |
|
"eval_samples_per_second": 527.711, |
|
"eval_steps_per_second": 16.785, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 1.8151628971099854, |
|
"learning_rate": 0.0002619557458957887, |
|
"loss": 0.4535, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.8097964376590331, |
|
"eval_loss": 0.4896373152732849, |
|
"eval_runtime": 2.9869, |
|
"eval_samples_per_second": 526.295, |
|
"eval_steps_per_second": 16.74, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 3.0790090560913086, |
|
"learning_rate": 0.0002583868665239115, |
|
"loss": 0.464, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_accuracy": 0.8078880407124682, |
|
"eval_loss": 0.517495334148407, |
|
"eval_runtime": 2.9986, |
|
"eval_samples_per_second": 524.246, |
|
"eval_steps_per_second": 16.674, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 1.1520639657974243, |
|
"learning_rate": 0.0002548179871520343, |
|
"loss": 0.4715, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_accuracy": 0.8027989821882952, |
|
"eval_loss": 0.5001667737960815, |
|
"eval_runtime": 2.9723, |
|
"eval_samples_per_second": 528.885, |
|
"eval_steps_per_second": 16.822, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 0.8184943795204163, |
|
"learning_rate": 0.000251249107780157, |
|
"loss": 0.468, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_accuracy": 0.8110687022900763, |
|
"eval_loss": 0.4883332848548889, |
|
"eval_runtime": 2.9769, |
|
"eval_samples_per_second": 528.068, |
|
"eval_steps_per_second": 16.796, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"grad_norm": 1.155013084411621, |
|
"learning_rate": 0.00024768022840827977, |
|
"loss": 0.4645, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_accuracy": 0.8040712468193384, |
|
"eval_loss": 0.5186554789543152, |
|
"eval_runtime": 2.9698, |
|
"eval_samples_per_second": 529.333, |
|
"eval_steps_per_second": 16.836, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"grad_norm": 1.6959339380264282, |
|
"learning_rate": 0.00024411134903640257, |
|
"loss": 0.445, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.806615776081425, |
|
"eval_loss": 0.4928103983402252, |
|
"eval_runtime": 2.9782, |
|
"eval_samples_per_second": 527.83, |
|
"eval_steps_per_second": 16.789, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"grad_norm": 1.0461735725402832, |
|
"learning_rate": 0.00024054246966452532, |
|
"loss": 0.4558, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_accuracy": 0.8078880407124682, |
|
"eval_loss": 0.48704999685287476, |
|
"eval_runtime": 2.9838, |
|
"eval_samples_per_second": 526.839, |
|
"eval_steps_per_second": 16.757, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"grad_norm": 0.9599233269691467, |
|
"learning_rate": 0.00023697359029264806, |
|
"loss": 0.4405, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"eval_accuracy": 0.8104325699745547, |
|
"eval_loss": 0.4985482692718506, |
|
"eval_runtime": 3.0065, |
|
"eval_samples_per_second": 522.862, |
|
"eval_steps_per_second": 16.63, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.4131615161895752, |
|
"learning_rate": 0.00023340471092077086, |
|
"loss": 0.4648, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8059796437659033, |
|
"eval_loss": 0.48415422439575195, |
|
"eval_runtime": 2.9786, |
|
"eval_samples_per_second": 527.759, |
|
"eval_steps_per_second": 16.786, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 1.189572811126709, |
|
"learning_rate": 0.0002298358315488936, |
|
"loss": 0.435, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"eval_accuracy": 0.811704834605598, |
|
"eval_loss": 0.4911487102508545, |
|
"eval_runtime": 2.9997, |
|
"eval_samples_per_second": 524.044, |
|
"eval_steps_per_second": 16.668, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"grad_norm": 1.5198345184326172, |
|
"learning_rate": 0.00022626695217701638, |
|
"loss": 0.437, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_accuracy": 0.8085241730279898, |
|
"eval_loss": 0.48542749881744385, |
|
"eval_runtime": 3.0042, |
|
"eval_samples_per_second": 523.274, |
|
"eval_steps_per_second": 16.644, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"grad_norm": 1.1990240812301636, |
|
"learning_rate": 0.00022269807280513918, |
|
"loss": 0.4588, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_accuracy": 0.8085241730279898, |
|
"eval_loss": 0.48791924118995667, |
|
"eval_runtime": 3.0014, |
|
"eval_samples_per_second": 523.758, |
|
"eval_steps_per_second": 16.659, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"grad_norm": 1.346658706665039, |
|
"learning_rate": 0.00021912919343326193, |
|
"loss": 0.4342, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_accuracy": 0.8104325699745547, |
|
"eval_loss": 0.49220582842826843, |
|
"eval_runtime": 3.0046, |
|
"eval_samples_per_second": 523.193, |
|
"eval_steps_per_second": 16.641, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"grad_norm": 1.8644700050354004, |
|
"learning_rate": 0.00021556031406138473, |
|
"loss": 0.4347, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_accuracy": 0.8142493638676844, |
|
"eval_loss": 0.49111655354499817, |
|
"eval_runtime": 2.985, |
|
"eval_samples_per_second": 526.634, |
|
"eval_steps_per_second": 16.75, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 1.9364045858383179, |
|
"learning_rate": 0.00021199143468950748, |
|
"loss": 0.4326, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_accuracy": 0.8078880407124682, |
|
"eval_loss": 0.491384893655777, |
|
"eval_runtime": 2.9723, |
|
"eval_samples_per_second": 528.882, |
|
"eval_steps_per_second": 16.822, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 0.9911957383155823, |
|
"learning_rate": 0.00020842255531763022, |
|
"loss": 0.4267, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_accuracy": 0.8104325699745547, |
|
"eval_loss": 0.4917159080505371, |
|
"eval_runtime": 2.9808, |
|
"eval_samples_per_second": 527.373, |
|
"eval_steps_per_second": 16.774, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"grad_norm": 1.2186638116836548, |
|
"learning_rate": 0.00020485367594575302, |
|
"loss": 0.4241, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_accuracy": 0.8136132315521628, |
|
"eval_loss": 0.4887010455131531, |
|
"eval_runtime": 2.9872, |
|
"eval_samples_per_second": 526.253, |
|
"eval_steps_per_second": 16.738, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 1.1467108726501465, |
|
"learning_rate": 0.0002012847965738758, |
|
"loss": 0.4376, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_accuracy": 0.8078880407124682, |
|
"eval_loss": 0.5122085213661194, |
|
"eval_runtime": 2.9829, |
|
"eval_samples_per_second": 527.007, |
|
"eval_steps_per_second": 16.762, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"grad_norm": 0.8427834510803223, |
|
"learning_rate": 0.00019771591720199854, |
|
"loss": 0.4323, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"eval_accuracy": 0.8097964376590331, |
|
"eval_loss": 0.49093857407569885, |
|
"eval_runtime": 2.9738, |
|
"eval_samples_per_second": 528.625, |
|
"eval_steps_per_second": 16.814, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"grad_norm": 1.2060902118682861, |
|
"learning_rate": 0.00019414703783012134, |
|
"loss": 0.4264, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_accuracy": 0.8142493638676844, |
|
"eval_loss": 0.48821595311164856, |
|
"eval_runtime": 2.9836, |
|
"eval_samples_per_second": 526.88, |
|
"eval_steps_per_second": 16.758, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"grad_norm": 1.7033394575119019, |
|
"learning_rate": 0.0001905781584582441, |
|
"loss": 0.4175, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_accuracy": 0.8053435114503816, |
|
"eval_loss": 0.5090692043304443, |
|
"eval_runtime": 2.9978, |
|
"eval_samples_per_second": 524.393, |
|
"eval_steps_per_second": 16.679, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"grad_norm": 1.3033976554870605, |
|
"learning_rate": 0.0001870092790863669, |
|
"loss": 0.4228, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_accuracy": 0.8097964376590331, |
|
"eval_loss": 0.5060204863548279, |
|
"eval_runtime": 2.9975, |
|
"eval_samples_per_second": 524.436, |
|
"eval_steps_per_second": 16.681, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"grad_norm": 1.2635438442230225, |
|
"learning_rate": 0.00018344039971448964, |
|
"loss": 0.4189, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8091603053435115, |
|
"eval_loss": 0.4940575361251831, |
|
"eval_runtime": 2.9634, |
|
"eval_samples_per_second": 530.468, |
|
"eval_steps_per_second": 16.872, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"grad_norm": 1.496982455253601, |
|
"learning_rate": 0.0001798715203426124, |
|
"loss": 0.4161, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"eval_accuracy": 0.8174300254452926, |
|
"eval_loss": 0.5010442137718201, |
|
"eval_runtime": 2.973, |
|
"eval_samples_per_second": 528.758, |
|
"eval_steps_per_second": 16.818, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"grad_norm": 1.355362892150879, |
|
"learning_rate": 0.00017630264097073518, |
|
"loss": 0.4078, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"eval_accuracy": 0.8078880407124682, |
|
"eval_loss": 0.4949406683444977, |
|
"eval_runtime": 2.9901, |
|
"eval_samples_per_second": 525.736, |
|
"eval_steps_per_second": 16.722, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"grad_norm": 1.180076241493225, |
|
"learning_rate": 0.00017273376159885795, |
|
"loss": 0.4201, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.5017107129096985, |
|
"eval_runtime": 2.952, |
|
"eval_samples_per_second": 532.525, |
|
"eval_steps_per_second": 16.938, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"grad_norm": 1.1020286083221436, |
|
"learning_rate": 0.0001691648822269807, |
|
"loss": 0.4141, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_accuracy": 0.8091603053435115, |
|
"eval_loss": 0.4984731078147888, |
|
"eval_runtime": 2.9633, |
|
"eval_samples_per_second": 530.497, |
|
"eval_steps_per_second": 16.873, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"grad_norm": 1.2666047811508179, |
|
"learning_rate": 0.0001655960028551035, |
|
"loss": 0.4132, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"eval_accuracy": 0.8053435114503816, |
|
"eval_loss": 0.5031649470329285, |
|
"eval_runtime": 2.9822, |
|
"eval_samples_per_second": 527.133, |
|
"eval_steps_per_second": 16.766, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"grad_norm": 0.6767197251319885, |
|
"learning_rate": 0.00016202712348322625, |
|
"loss": 0.4043, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"eval_accuracy": 0.8129770992366412, |
|
"eval_loss": 0.5038406848907471, |
|
"eval_runtime": 2.9816, |
|
"eval_samples_per_second": 527.24, |
|
"eval_steps_per_second": 16.77, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"grad_norm": 1.147275447845459, |
|
"learning_rate": 0.00015845824411134902, |
|
"loss": 0.4187, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"eval_accuracy": 0.8104325699745547, |
|
"eval_loss": 0.4981047213077545, |
|
"eval_runtime": 2.9858, |
|
"eval_samples_per_second": 526.485, |
|
"eval_steps_per_second": 16.746, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"grad_norm": 1.6172677278518677, |
|
"learning_rate": 0.0001548893647394718, |
|
"loss": 0.3827, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.5126467943191528, |
|
"eval_runtime": 2.9825, |
|
"eval_samples_per_second": 527.072, |
|
"eval_steps_per_second": 16.764, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"grad_norm": 1.8639923334121704, |
|
"learning_rate": 0.00015132048536759457, |
|
"loss": 0.4074, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.5088323950767517, |
|
"eval_runtime": 2.9816, |
|
"eval_samples_per_second": 527.237, |
|
"eval_steps_per_second": 16.77, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"grad_norm": 1.2519667148590088, |
|
"learning_rate": 0.00014775160599571734, |
|
"loss": 0.4013, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.5061373114585876, |
|
"eval_runtime": 2.9811, |
|
"eval_samples_per_second": 527.316, |
|
"eval_steps_per_second": 16.772, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"grad_norm": 1.1711052656173706, |
|
"learning_rate": 0.0001441827266238401, |
|
"loss": 0.3888, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"eval_accuracy": 0.8085241730279898, |
|
"eval_loss": 0.5013065338134766, |
|
"eval_runtime": 2.9847, |
|
"eval_samples_per_second": 526.681, |
|
"eval_steps_per_second": 16.752, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"grad_norm": 1.8078001737594604, |
|
"learning_rate": 0.00014061384725196286, |
|
"loss": 0.3855, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"eval_accuracy": 0.8059796437659033, |
|
"eval_loss": 0.4992610514163971, |
|
"eval_runtime": 2.9927, |
|
"eval_samples_per_second": 525.27, |
|
"eval_steps_per_second": 16.707, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"grad_norm": 1.1071592569351196, |
|
"learning_rate": 0.00013704496788008563, |
|
"loss": 0.3924, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"eval_accuracy": 0.8085241730279898, |
|
"eval_loss": 0.5075262188911438, |
|
"eval_runtime": 3.0066, |
|
"eval_samples_per_second": 522.844, |
|
"eval_steps_per_second": 16.63, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"grad_norm": 1.3704427480697632, |
|
"learning_rate": 0.0001334760885082084, |
|
"loss": 0.4046, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.8027989821882952, |
|
"eval_loss": 0.49990707635879517, |
|
"eval_runtime": 3.0049, |
|
"eval_samples_per_second": 523.149, |
|
"eval_steps_per_second": 16.64, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"grad_norm": 1.40170419216156, |
|
"learning_rate": 0.00012990720913633118, |
|
"loss": 0.3957, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"eval_accuracy": 0.8034351145038168, |
|
"eval_loss": 0.5089264512062073, |
|
"eval_runtime": 2.9942, |
|
"eval_samples_per_second": 525.011, |
|
"eval_steps_per_second": 16.699, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"grad_norm": 1.1885521411895752, |
|
"learning_rate": 0.00012633832976445395, |
|
"loss": 0.381, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.5207549929618835, |
|
"eval_runtime": 2.9746, |
|
"eval_samples_per_second": 528.479, |
|
"eval_steps_per_second": 16.809, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"grad_norm": 0.8873888254165649, |
|
"learning_rate": 0.00012276945039257673, |
|
"loss": 0.3906, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"eval_accuracy": 0.806615776081425, |
|
"eval_loss": 0.513671875, |
|
"eval_runtime": 2.961, |
|
"eval_samples_per_second": 530.901, |
|
"eval_steps_per_second": 16.886, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"grad_norm": 1.6491570472717285, |
|
"learning_rate": 0.0001192005710206995, |
|
"loss": 0.3734, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"eval_accuracy": 0.8040712468193384, |
|
"eval_loss": 0.5183374881744385, |
|
"eval_runtime": 2.9533, |
|
"eval_samples_per_second": 532.292, |
|
"eval_steps_per_second": 16.93, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"grad_norm": 2.042646884918213, |
|
"learning_rate": 0.00011563169164882227, |
|
"loss": 0.3928, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_accuracy": 0.806615776081425, |
|
"eval_loss": 0.5069447159767151, |
|
"eval_runtime": 2.959, |
|
"eval_samples_per_second": 531.259, |
|
"eval_steps_per_second": 16.898, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"grad_norm": 0.817425549030304, |
|
"learning_rate": 0.00011206281227694502, |
|
"loss": 0.3774, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"eval_accuracy": 0.8008905852417303, |
|
"eval_loss": 0.5086419582366943, |
|
"eval_runtime": 2.9547, |
|
"eval_samples_per_second": 532.04, |
|
"eval_steps_per_second": 16.922, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"grad_norm": 1.0988578796386719, |
|
"learning_rate": 0.0001084939329050678, |
|
"loss": 0.3892, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"eval_accuracy": 0.8059796437659033, |
|
"eval_loss": 0.4966925382614136, |
|
"eval_runtime": 2.9538, |
|
"eval_samples_per_second": 532.194, |
|
"eval_steps_per_second": 16.927, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"grad_norm": 1.312321662902832, |
|
"learning_rate": 0.00010492505353319058, |
|
"loss": 0.372, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"eval_accuracy": 0.8040712468193384, |
|
"eval_loss": 0.5042534470558167, |
|
"eval_runtime": 2.9651, |
|
"eval_samples_per_second": 530.16, |
|
"eval_steps_per_second": 16.863, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"grad_norm": 1.642741322517395, |
|
"learning_rate": 0.00010135617416131332, |
|
"loss": 0.388, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"eval_accuracy": 0.8072519083969466, |
|
"eval_loss": 0.5095480680465698, |
|
"eval_runtime": 2.9526, |
|
"eval_samples_per_second": 532.404, |
|
"eval_steps_per_second": 16.934, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"grad_norm": 1.10377836227417, |
|
"learning_rate": 9.778729478943611e-05, |
|
"loss": 0.3754, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"eval_accuracy": 0.8021628498727735, |
|
"eval_loss": 0.5103972554206848, |
|
"eval_runtime": 2.9663, |
|
"eval_samples_per_second": 529.954, |
|
"eval_steps_per_second": 16.856, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"grad_norm": 1.1614229679107666, |
|
"learning_rate": 9.421841541755888e-05, |
|
"loss": 0.3639, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_accuracy": 0.7983460559796438, |
|
"eval_loss": 0.5263165235519409, |
|
"eval_runtime": 2.9391, |
|
"eval_samples_per_second": 534.858, |
|
"eval_steps_per_second": 17.012, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"grad_norm": 1.6049692630767822, |
|
"learning_rate": 9.064953604568166e-05, |
|
"loss": 0.3795, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"eval_accuracy": 0.8015267175572519, |
|
"eval_loss": 0.5145931839942932, |
|
"eval_runtime": 2.9465, |
|
"eval_samples_per_second": 533.506, |
|
"eval_steps_per_second": 16.969, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"grad_norm": 2.813002347946167, |
|
"learning_rate": 8.708065667380442e-05, |
|
"loss": 0.3792, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"eval_accuracy": 0.8040712468193384, |
|
"eval_loss": 0.5066380500793457, |
|
"eval_runtime": 2.9409, |
|
"eval_samples_per_second": 534.523, |
|
"eval_steps_per_second": 17.001, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"grad_norm": 1.2670201063156128, |
|
"learning_rate": 8.351177730192719e-05, |
|
"loss": 0.3589, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.8078880407124682, |
|
"eval_loss": 0.5135853886604309, |
|
"eval_runtime": 2.962, |
|
"eval_samples_per_second": 530.717, |
|
"eval_steps_per_second": 16.88, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"grad_norm": 1.9681557416915894, |
|
"learning_rate": 7.994289793004996e-05, |
|
"loss": 0.3624, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_accuracy": 0.8021628498727735, |
|
"eval_loss": 0.5237164497375488, |
|
"eval_runtime": 2.9535, |
|
"eval_samples_per_second": 532.245, |
|
"eval_steps_per_second": 16.929, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"grad_norm": 1.8548041582107544, |
|
"learning_rate": 7.637401855817274e-05, |
|
"loss": 0.3659, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"eval_accuracy": 0.8059796437659033, |
|
"eval_loss": 0.5165674090385437, |
|
"eval_runtime": 2.9482, |
|
"eval_samples_per_second": 533.2, |
|
"eval_steps_per_second": 16.959, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"grad_norm": 1.3727173805236816, |
|
"learning_rate": 7.28051391862955e-05, |
|
"loss": 0.3657, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"eval_accuracy": 0.8002544529262087, |
|
"eval_loss": 0.5177738070487976, |
|
"eval_runtime": 2.9451, |
|
"eval_samples_per_second": 533.764, |
|
"eval_steps_per_second": 16.977, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"grad_norm": 2.10198974609375, |
|
"learning_rate": 6.923625981441827e-05, |
|
"loss": 0.359, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"eval_accuracy": 0.7983460559796438, |
|
"eval_loss": 0.5152426362037659, |
|
"eval_runtime": 2.9473, |
|
"eval_samples_per_second": 533.372, |
|
"eval_steps_per_second": 16.965, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"grad_norm": 1.0453667640686035, |
|
"learning_rate": 6.566738044254104e-05, |
|
"loss": 0.3677, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"eval_accuracy": 0.8034351145038168, |
|
"eval_loss": 0.5211815237998962, |
|
"eval_runtime": 2.9478, |
|
"eval_samples_per_second": 533.274, |
|
"eval_steps_per_second": 16.962, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"grad_norm": 1.0645538568496704, |
|
"learning_rate": 6.20985010706638e-05, |
|
"loss": 0.3521, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"eval_accuracy": 0.8002544529262087, |
|
"eval_loss": 0.5323696732521057, |
|
"eval_runtime": 2.9594, |
|
"eval_samples_per_second": 531.197, |
|
"eval_steps_per_second": 16.896, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"grad_norm": 3.849015951156616, |
|
"learning_rate": 5.852962169878657e-05, |
|
"loss": 0.3589, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"eval_accuracy": 0.8040712468193384, |
|
"eval_loss": 0.5237988829612732, |
|
"eval_runtime": 2.9364, |
|
"eval_samples_per_second": 535.357, |
|
"eval_steps_per_second": 17.028, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"grad_norm": 1.3231987953186035, |
|
"learning_rate": 5.496074232690935e-05, |
|
"loss": 0.3695, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"eval_accuracy": 0.7977099236641222, |
|
"eval_loss": 0.511340320110321, |
|
"eval_runtime": 2.969, |
|
"eval_samples_per_second": 529.468, |
|
"eval_steps_per_second": 16.841, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"grad_norm": 1.7709985971450806, |
|
"learning_rate": 5.139186295503211e-05, |
|
"loss": 0.3606, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 16.92, |
|
"eval_accuracy": 0.7983460559796438, |
|
"eval_loss": 0.5136662721633911, |
|
"eval_runtime": 2.9594, |
|
"eval_samples_per_second": 531.193, |
|
"eval_steps_per_second": 16.895, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"grad_norm": 1.5108495950698853, |
|
"learning_rate": 4.782298358315489e-05, |
|
"loss": 0.3581, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"eval_accuracy": 0.799618320610687, |
|
"eval_loss": 0.5130853056907654, |
|
"eval_runtime": 2.9611, |
|
"eval_samples_per_second": 530.882, |
|
"eval_steps_per_second": 16.886, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"grad_norm": 1.3634617328643799, |
|
"learning_rate": 4.4254104211277655e-05, |
|
"loss": 0.3488, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"eval_accuracy": 0.7989821882951654, |
|
"eval_loss": 0.5270070433616638, |
|
"eval_runtime": 2.9953, |
|
"eval_samples_per_second": 524.824, |
|
"eval_steps_per_second": 16.693, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"grad_norm": 1.0239213705062866, |
|
"learning_rate": 4.068522483940043e-05, |
|
"loss": 0.3499, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"eval_accuracy": 0.7964376590330788, |
|
"eval_loss": 0.523576021194458, |
|
"eval_runtime": 2.9356, |
|
"eval_samples_per_second": 535.502, |
|
"eval_steps_per_second": 17.033, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"grad_norm": 1.108484148979187, |
|
"learning_rate": 3.7116345467523195e-05, |
|
"loss": 0.3603, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"eval_accuracy": 0.8002544529262087, |
|
"eval_loss": 0.5186541080474854, |
|
"eval_runtime": 2.9666, |
|
"eval_samples_per_second": 529.891, |
|
"eval_steps_per_second": 16.854, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"grad_norm": 2.816092014312744, |
|
"learning_rate": 3.354746609564596e-05, |
|
"loss": 0.3578, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.8021628498727735, |
|
"eval_loss": 0.5223926901817322, |
|
"eval_runtime": 2.9355, |
|
"eval_samples_per_second": 535.521, |
|
"eval_steps_per_second": 17.033, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"grad_norm": 1.5831489562988281, |
|
"learning_rate": 2.997858672376873e-05, |
|
"loss": 0.3449, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"eval_accuracy": 0.7989821882951654, |
|
"eval_loss": 0.5227622389793396, |
|
"eval_runtime": 2.9602, |
|
"eval_samples_per_second": 531.048, |
|
"eval_steps_per_second": 16.891, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"grad_norm": 1.0060327053070068, |
|
"learning_rate": 2.64097073518915e-05, |
|
"loss": 0.3418, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"eval_accuracy": 0.8008905852417303, |
|
"eval_loss": 0.5287216901779175, |
|
"eval_runtime": 2.9537, |
|
"eval_samples_per_second": 532.21, |
|
"eval_steps_per_second": 16.928, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"grad_norm": 1.8092093467712402, |
|
"learning_rate": 2.2840827980014274e-05, |
|
"loss": 0.3334, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"eval_accuracy": 0.799618320610687, |
|
"eval_loss": 0.5322315096855164, |
|
"eval_runtime": 2.9745, |
|
"eval_samples_per_second": 528.484, |
|
"eval_steps_per_second": 16.809, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"grad_norm": 1.4800430536270142, |
|
"learning_rate": 1.9271948608137044e-05, |
|
"loss": 0.3567, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"eval_accuracy": 0.7983460559796438, |
|
"eval_loss": 0.5293812155723572, |
|
"eval_runtime": 2.9485, |
|
"eval_samples_per_second": 533.161, |
|
"eval_steps_per_second": 16.958, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"grad_norm": 1.6271811723709106, |
|
"learning_rate": 1.5703069236259814e-05, |
|
"loss": 0.3541, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"eval_accuracy": 0.8002544529262087, |
|
"eval_loss": 0.5250320434570312, |
|
"eval_runtime": 2.9479, |
|
"eval_samples_per_second": 533.268, |
|
"eval_steps_per_second": 16.961, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"grad_norm": 0.7758527994155884, |
|
"learning_rate": 1.2134189864382584e-05, |
|
"loss": 0.365, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"eval_accuracy": 0.7983460559796438, |
|
"eval_loss": 0.5246437788009644, |
|
"eval_runtime": 2.9363, |
|
"eval_samples_per_second": 535.369, |
|
"eval_steps_per_second": 17.028, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"grad_norm": 0.9722337126731873, |
|
"learning_rate": 8.565310492505352e-06, |
|
"loss": 0.337, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"eval_accuracy": 0.7977099236641222, |
|
"eval_loss": 0.527810275554657, |
|
"eval_runtime": 2.9383, |
|
"eval_samples_per_second": 535.006, |
|
"eval_steps_per_second": 17.017, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"grad_norm": 1.5007203817367554, |
|
"learning_rate": 4.996431120628123e-06, |
|
"loss": 0.3301, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"eval_accuracy": 0.7989821882951654, |
|
"eval_loss": 0.5283259153366089, |
|
"eval_runtime": 2.9603, |
|
"eval_samples_per_second": 531.035, |
|
"eval_steps_per_second": 16.89, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"grad_norm": 1.1220752000808716, |
|
"learning_rate": 1.4275517487508921e-06, |
|
"loss": 0.3421, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"eval_accuracy": 0.7977099236641222, |
|
"eval_loss": 0.5287136435508728, |
|
"eval_runtime": 2.9398, |
|
"eval_samples_per_second": 534.737, |
|
"eval_steps_per_second": 17.008, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 18680, |
|
"total_flos": 1.44512252251488e+16, |
|
"train_loss": 0.42864556159401346, |
|
"train_runtime": 2680.553, |
|
"train_samples_per_second": 222.82, |
|
"train_steps_per_second": 6.969 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 18680, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 200, |
|
"total_flos": 1.44512252251488e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|