|
[ |
|
{ |
|
"loss": 2.0071, |
|
"grad_norm": 0.4352966547012329, |
|
"learning_rate": 8.923190911336132e-5, |
|
"epoch": 0.2218976306523778, |
|
"step": 451 |
|
}, |
|
{ |
|
"eval_loss": 1.9273011684417725, |
|
"eval_runtime": 896.6751, |
|
"eval_samples_per_second": 32.238, |
|
"eval_steps_per_second": 8.06, |
|
"epoch": 0.2218976306523778, |
|
"step": 451 |
|
}, |
|
{ |
|
"loss": 1.9032, |
|
"grad_norm": 0.43635115027427673, |
|
"learning_rate": 7.809335638429242e-5, |
|
"epoch": 0.4437952613047556, |
|
"step": 902 |
|
}, |
|
{ |
|
"eval_loss": 1.8892905712127686, |
|
"eval_runtime": 896.4291, |
|
"eval_samples_per_second": 32.247, |
|
"eval_steps_per_second": 8.062, |
|
"epoch": 0.4437952613047556, |
|
"step": 902 |
|
}, |
|
{ |
|
"loss": 1.8749, |
|
"grad_norm": 0.4370776116847992, |
|
"learning_rate": 6.695480365522352e-5, |
|
"epoch": 0.6656928919571334, |
|
"step": 1353 |
|
}, |
|
{ |
|
"eval_loss": 1.866470217704773, |
|
"eval_runtime": 899.5289, |
|
"eval_samples_per_second": 32.136, |
|
"eval_steps_per_second": 8.034, |
|
"epoch": 0.6656928919571334, |
|
"step": 1353 |
|
}, |
|
{ |
|
"loss": 1.8578, |
|
"grad_norm": 0.45984092354774475, |
|
"learning_rate": 5.581625092615461e-5, |
|
"epoch": 0.8875905226095112, |
|
"step": 1804 |
|
}, |
|
{ |
|
"eval_loss": 1.850355863571167, |
|
"eval_runtime": 899.8612, |
|
"eval_samples_per_second": 32.124, |
|
"eval_steps_per_second": 8.031, |
|
"epoch": 0.8875905226095112, |
|
"step": 1804 |
|
}, |
|
{ |
|
"loss": 1.8145, |
|
"grad_norm": 0.4957409203052521, |
|
"learning_rate": 4.4677698197085704e-5, |
|
"epoch": 1.109488153261889, |
|
"step": 2255 |
|
}, |
|
{ |
|
"eval_loss": 1.8413817882537842, |
|
"eval_runtime": 896.594, |
|
"eval_samples_per_second": 32.241, |
|
"eval_steps_per_second": 8.061, |
|
"epoch": 1.109488153261889, |
|
"step": 2255 |
|
}, |
|
{ |
|
"loss": 1.7791, |
|
"grad_norm": 0.5043504238128662, |
|
"learning_rate": 3.3539145468016795e-5, |
|
"epoch": 1.3313857839142669, |
|
"step": 2706 |
|
}, |
|
{ |
|
"eval_loss": 1.8332940340042114, |
|
"eval_runtime": 898.6194, |
|
"eval_samples_per_second": 32.168, |
|
"eval_steps_per_second": 8.042, |
|
"epoch": 1.3313857839142669, |
|
"step": 2706 |
|
}, |
|
{ |
|
"loss": 1.7728, |
|
"grad_norm": 0.5073263049125671, |
|
"learning_rate": 2.240059273894789e-5, |
|
"epoch": 1.5532834145666445, |
|
"step": 3157 |
|
}, |
|
{ |
|
"eval_loss": 1.8273794651031494, |
|
"eval_runtime": 899.231, |
|
"eval_samples_per_second": 32.146, |
|
"eval_steps_per_second": 8.037, |
|
"epoch": 1.5532834145666445, |
|
"step": 3157 |
|
}, |
|
{ |
|
"loss": 1.7671, |
|
"grad_norm": 0.5197569131851196, |
|
"learning_rate": 1.1262040009878982e-5, |
|
"epoch": 1.7751810452190224, |
|
"step": 3608 |
|
}, |
|
{ |
|
"eval_loss": 1.8234010934829712, |
|
"eval_runtime": 899.3422, |
|
"eval_samples_per_second": 32.142, |
|
"eval_steps_per_second": 8.036, |
|
"epoch": 1.7751810452190224, |
|
"step": 3608 |
|
}, |
|
{ |
|
"loss": 1.7679, |
|
"grad_norm": 0.5240359306335449, |
|
"learning_rate": 1.2348728081007656e-7, |
|
"epoch": 1.9970786758714003, |
|
"step": 4059 |
|
}, |
|
{ |
|
"eval_loss": 1.8216350078582764, |
|
"eval_runtime": 897.3741, |
|
"eval_samples_per_second": 32.213, |
|
"eval_steps_per_second": 8.053, |
|
"epoch": 1.9970786758714003, |
|
"step": 4059 |
|
}, |
|
{ |
|
"train_runtime": 62857.4586, |
|
"train_samples_per_second": 8.278, |
|
"train_steps_per_second": 0.065, |
|
"total_flos": 7.055063070229955e18, |
|
"train_loss": 1.8381839976536007, |
|
"epoch": 1.9995387382954841, |
|
"step": 4064 |
|
} |
|
] |
|
|