dixedus's picture
Training in progress, step 99, checkpoint
991384a verified
raw
history blame
9.05 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3878550440744368,
"eval_steps": 9,
"global_step": 99,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0039177277179236044,
"eval_loss": 10.374105453491211,
"eval_runtime": 0.767,
"eval_samples_per_second": 560.637,
"eval_steps_per_second": 70.406,
"step": 1
},
{
"epoch": 0.011753183153770812,
"grad_norm": 0.04419707879424095,
"learning_rate": 3e-05,
"loss": 10.3757,
"step": 3
},
{
"epoch": 0.023506366307541625,
"grad_norm": 0.03822525963187218,
"learning_rate": 6e-05,
"loss": 10.3745,
"step": 6
},
{
"epoch": 0.03525954946131244,
"grad_norm": 0.03418227285146713,
"learning_rate": 9e-05,
"loss": 10.3751,
"step": 9
},
{
"epoch": 0.03525954946131244,
"eval_loss": 10.373617172241211,
"eval_runtime": 0.7461,
"eval_samples_per_second": 576.299,
"eval_steps_per_second": 72.372,
"step": 9
},
{
"epoch": 0.04701273261508325,
"grad_norm": 0.04289775341749191,
"learning_rate": 9.987820251299122e-05,
"loss": 10.3756,
"step": 12
},
{
"epoch": 0.058765915768854066,
"grad_norm": 0.03956589475274086,
"learning_rate": 9.924038765061042e-05,
"loss": 10.3729,
"step": 15
},
{
"epoch": 0.07051909892262488,
"grad_norm": 0.0488935187458992,
"learning_rate": 9.806308479691595e-05,
"loss": 10.3732,
"step": 18
},
{
"epoch": 0.07051909892262488,
"eval_loss": 10.37240982055664,
"eval_runtime": 0.7837,
"eval_samples_per_second": 548.666,
"eval_steps_per_second": 68.902,
"step": 18
},
{
"epoch": 0.08227228207639568,
"grad_norm": 0.033634647727012634,
"learning_rate": 9.635919272833938e-05,
"loss": 10.3756,
"step": 21
},
{
"epoch": 0.0940254652301665,
"grad_norm": 0.05794714018702507,
"learning_rate": 9.414737964294636e-05,
"loss": 10.373,
"step": 24
},
{
"epoch": 0.10577864838393732,
"grad_norm": 0.035457514226436615,
"learning_rate": 9.145187862775209e-05,
"loss": 10.3694,
"step": 27
},
{
"epoch": 0.10577864838393732,
"eval_loss": 10.371187210083008,
"eval_runtime": 0.7714,
"eval_samples_per_second": 557.405,
"eval_steps_per_second": 70.0,
"step": 27
},
{
"epoch": 0.11753183153770813,
"grad_norm": 0.04388291388750076,
"learning_rate": 8.83022221559489e-05,
"loss": 10.3715,
"step": 30
},
{
"epoch": 0.12928501469147893,
"grad_norm": 0.04818139225244522,
"learning_rate": 8.473291852294987e-05,
"loss": 10.3729,
"step": 33
},
{
"epoch": 0.14103819784524976,
"grad_norm": 0.04022936895489693,
"learning_rate": 8.07830737662829e-05,
"loss": 10.3716,
"step": 36
},
{
"epoch": 0.14103819784524976,
"eval_loss": 10.369961738586426,
"eval_runtime": 0.7491,
"eval_samples_per_second": 574.0,
"eval_steps_per_second": 72.084,
"step": 36
},
{
"epoch": 0.15279138099902057,
"grad_norm": 0.05035528540611267,
"learning_rate": 7.649596321166024e-05,
"loss": 10.3703,
"step": 39
},
{
"epoch": 0.16454456415279137,
"grad_norm": 0.04766154661774635,
"learning_rate": 7.191855733945387e-05,
"loss": 10.3692,
"step": 42
},
{
"epoch": 0.1762977473065622,
"grad_norm": 0.03941637650132179,
"learning_rate": 6.710100716628344e-05,
"loss": 10.3696,
"step": 45
},
{
"epoch": 0.1762977473065622,
"eval_loss": 10.368770599365234,
"eval_runtime": 0.784,
"eval_samples_per_second": 548.49,
"eval_steps_per_second": 68.88,
"step": 45
},
{
"epoch": 0.188050930460333,
"grad_norm": 0.03891783580183983,
"learning_rate": 6.209609477998338e-05,
"loss": 10.3699,
"step": 48
},
{
"epoch": 0.19980411361410383,
"grad_norm": 0.04591111093759537,
"learning_rate": 5.695865504800327e-05,
"loss": 10.3683,
"step": 51
},
{
"epoch": 0.21155729676787463,
"grad_norm": 0.06579438596963882,
"learning_rate": 5.174497483512506e-05,
"loss": 10.3693,
"step": 54
},
{
"epoch": 0.21155729676787463,
"eval_loss": 10.367687225341797,
"eval_runtime": 0.7705,
"eval_samples_per_second": 558.084,
"eval_steps_per_second": 70.085,
"step": 54
},
{
"epoch": 0.22331047992164543,
"grad_norm": 0.048035670071840286,
"learning_rate": 4.6512176312793736e-05,
"loss": 10.3687,
"step": 57
},
{
"epoch": 0.23506366307541626,
"grad_norm": 0.046677302569150925,
"learning_rate": 4.131759111665349e-05,
"loss": 10.3667,
"step": 60
},
{
"epoch": 0.24681684622918706,
"grad_norm": 0.04819753021001816,
"learning_rate": 3.6218132209150045e-05,
"loss": 10.3666,
"step": 63
},
{
"epoch": 0.24681684622918706,
"eval_loss": 10.366778373718262,
"eval_runtime": 0.7477,
"eval_samples_per_second": 575.108,
"eval_steps_per_second": 72.223,
"step": 63
},
{
"epoch": 0.25857002938295787,
"grad_norm": 0.05676067993044853,
"learning_rate": 3.12696703292044e-05,
"loss": 10.3669,
"step": 66
},
{
"epoch": 0.2703232125367287,
"grad_norm": 0.05819647014141083,
"learning_rate": 2.6526421860705473e-05,
"loss": 10.3661,
"step": 69
},
{
"epoch": 0.2820763956904995,
"grad_norm": 0.056551020592451096,
"learning_rate": 2.2040354826462668e-05,
"loss": 10.3681,
"step": 72
},
{
"epoch": 0.2820763956904995,
"eval_loss": 10.366111755371094,
"eval_runtime": 0.7821,
"eval_samples_per_second": 549.813,
"eval_steps_per_second": 69.046,
"step": 72
},
{
"epoch": 0.2938295788442703,
"grad_norm": 0.06585326790809631,
"learning_rate": 1.7860619515673033e-05,
"loss": 10.3683,
"step": 75
},
{
"epoch": 0.30558276199804113,
"grad_norm": 0.04930694028735161,
"learning_rate": 1.4033009983067452e-05,
"loss": 10.365,
"step": 78
},
{
"epoch": 0.31733594515181196,
"grad_norm": 0.06866071373224258,
"learning_rate": 1.0599462319663905e-05,
"loss": 10.367,
"step": 81
},
{
"epoch": 0.31733594515181196,
"eval_loss": 10.365713119506836,
"eval_runtime": 0.7481,
"eval_samples_per_second": 574.788,
"eval_steps_per_second": 72.183,
"step": 81
},
{
"epoch": 0.32908912830558273,
"grad_norm": 0.04585767164826393,
"learning_rate": 7.597595192178702e-06,
"loss": 10.3658,
"step": 84
},
{
"epoch": 0.34084231145935356,
"grad_norm": 0.05678807944059372,
"learning_rate": 5.060297685041659e-06,
"loss": 10.367,
"step": 87
},
{
"epoch": 0.3525954946131244,
"grad_norm": 0.056517504155635834,
"learning_rate": 3.0153689607045845e-06,
"loss": 10.3663,
"step": 90
},
{
"epoch": 0.3525954946131244,
"eval_loss": 10.365554809570312,
"eval_runtime": 0.754,
"eval_samples_per_second": 570.3,
"eval_steps_per_second": 71.619,
"step": 90
},
{
"epoch": 0.3643486777668952,
"grad_norm": 0.05382026731967926,
"learning_rate": 1.4852136862001764e-06,
"loss": 10.366,
"step": 93
},
{
"epoch": 0.376101860920666,
"grad_norm": 0.05993453785777092,
"learning_rate": 4.865965629214819e-07,
"loss": 10.3649,
"step": 96
},
{
"epoch": 0.3878550440744368,
"grad_norm": 0.05342447757720947,
"learning_rate": 3.04586490452119e-08,
"loss": 10.3646,
"step": 99
},
{
"epoch": 0.3878550440744368,
"eval_loss": 10.365524291992188,
"eval_runtime": 0.7745,
"eval_samples_per_second": 555.185,
"eval_steps_per_second": 69.721,
"step": 99
}
],
"logging_steps": 3,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 9,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 10355884425216.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}