kayrab's picture
Upload 2 files
133b351 verified
[
{
"loss": 2.0071,
"grad_norm": 0.4352966547012329,
"learning_rate": 8.923190911336132e-5,
"epoch": 0.2218976306523778,
"step": 451
},
{
"eval_loss": 1.9273011684417725,
"eval_runtime": 896.6751,
"eval_samples_per_second": 32.238,
"eval_steps_per_second": 8.06,
"epoch": 0.2218976306523778,
"step": 451
},
{
"loss": 1.9032,
"grad_norm": 0.43635115027427673,
"learning_rate": 7.809335638429242e-5,
"epoch": 0.4437952613047556,
"step": 902
},
{
"eval_loss": 1.8892905712127686,
"eval_runtime": 896.4291,
"eval_samples_per_second": 32.247,
"eval_steps_per_second": 8.062,
"epoch": 0.4437952613047556,
"step": 902
},
{
"loss": 1.8749,
"grad_norm": 0.4370776116847992,
"learning_rate": 6.695480365522352e-5,
"epoch": 0.6656928919571334,
"step": 1353
},
{
"eval_loss": 1.866470217704773,
"eval_runtime": 899.5289,
"eval_samples_per_second": 32.136,
"eval_steps_per_second": 8.034,
"epoch": 0.6656928919571334,
"step": 1353
},
{
"loss": 1.8578,
"grad_norm": 0.45984092354774475,
"learning_rate": 5.581625092615461e-5,
"epoch": 0.8875905226095112,
"step": 1804
},
{
"eval_loss": 1.850355863571167,
"eval_runtime": 899.8612,
"eval_samples_per_second": 32.124,
"eval_steps_per_second": 8.031,
"epoch": 0.8875905226095112,
"step": 1804
},
{
"loss": 1.8145,
"grad_norm": 0.4957409203052521,
"learning_rate": 4.4677698197085704e-5,
"epoch": 1.109488153261889,
"step": 2255
},
{
"eval_loss": 1.8413817882537842,
"eval_runtime": 896.594,
"eval_samples_per_second": 32.241,
"eval_steps_per_second": 8.061,
"epoch": 1.109488153261889,
"step": 2255
},
{
"loss": 1.7791,
"grad_norm": 0.5043504238128662,
"learning_rate": 3.3539145468016795e-5,
"epoch": 1.3313857839142669,
"step": 2706
},
{
"eval_loss": 1.8332940340042114,
"eval_runtime": 898.6194,
"eval_samples_per_second": 32.168,
"eval_steps_per_second": 8.042,
"epoch": 1.3313857839142669,
"step": 2706
},
{
"loss": 1.7728,
"grad_norm": 0.5073263049125671,
"learning_rate": 2.240059273894789e-5,
"epoch": 1.5532834145666445,
"step": 3157
},
{
"eval_loss": 1.8273794651031494,
"eval_runtime": 899.231,
"eval_samples_per_second": 32.146,
"eval_steps_per_second": 8.037,
"epoch": 1.5532834145666445,
"step": 3157
},
{
"loss": 1.7671,
"grad_norm": 0.5197569131851196,
"learning_rate": 1.1262040009878982e-5,
"epoch": 1.7751810452190224,
"step": 3608
},
{
"eval_loss": 1.8234010934829712,
"eval_runtime": 899.3422,
"eval_samples_per_second": 32.142,
"eval_steps_per_second": 8.036,
"epoch": 1.7751810452190224,
"step": 3608
},
{
"loss": 1.7679,
"grad_norm": 0.5240359306335449,
"learning_rate": 1.2348728081007656e-7,
"epoch": 1.9970786758714003,
"step": 4059
},
{
"eval_loss": 1.8216350078582764,
"eval_runtime": 897.3741,
"eval_samples_per_second": 32.213,
"eval_steps_per_second": 8.053,
"epoch": 1.9970786758714003,
"step": 4059
},
{
"train_runtime": 62857.4586,
"train_samples_per_second": 8.278,
"train_steps_per_second": 0.065,
"total_flos": 7.055063070229955e18,
"train_loss": 1.8381839976536007,
"epoch": 1.9995387382954841,
"step": 4064
}
]