sailor-chat / last-checkpoint /trainer_state.json
iamnguyen's picture
Training in progress, step 64, checkpoint
141672d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0041376746591671315,
"eval_steps": 500,
"global_step": 64,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 6.465116654948643e-05,
"grad_norm": 44.632102966308594,
"learning_rate": 6.451612903225807e-08,
"loss": 1.8854,
"step": 1
},
{
"epoch": 0.00012930233309897286,
"grad_norm": 46.18963623046875,
"learning_rate": 1.2903225806451614e-07,
"loss": 1.7711,
"step": 2
},
{
"epoch": 0.0001939534996484593,
"grad_norm": 45.15989685058594,
"learning_rate": 1.935483870967742e-07,
"loss": 1.9258,
"step": 3
},
{
"epoch": 0.0002586046661979457,
"grad_norm": 48.17970275878906,
"learning_rate": 2.580645161290323e-07,
"loss": 1.9478,
"step": 4
},
{
"epoch": 0.00032325583274743215,
"grad_norm": 46.92567443847656,
"learning_rate": 3.2258064516129035e-07,
"loss": 1.8888,
"step": 5
},
{
"epoch": 0.0003879069992969186,
"grad_norm": 32.95823287963867,
"learning_rate": 3.870967741935484e-07,
"loss": 1.877,
"step": 6
},
{
"epoch": 0.000452558165846405,
"grad_norm": 25.36848258972168,
"learning_rate": 4.5161290322580644e-07,
"loss": 1.7888,
"step": 7
},
{
"epoch": 0.0005172093323958914,
"grad_norm": 31.360990524291992,
"learning_rate": 5.161290322580646e-07,
"loss": 1.8526,
"step": 8
},
{
"epoch": 0.0005818604989453779,
"grad_norm": 23.622682571411133,
"learning_rate": 5.806451612903227e-07,
"loss": 1.8157,
"step": 9
},
{
"epoch": 0.0006465116654948643,
"grad_norm": 22.565826416015625,
"learning_rate": 6.451612903225807e-07,
"loss": 1.7774,
"step": 10
},
{
"epoch": 0.0007111628320443507,
"grad_norm": 22.03154945373535,
"learning_rate": 7.096774193548388e-07,
"loss": 1.7796,
"step": 11
},
{
"epoch": 0.0007758139985938372,
"grad_norm": 22.295787811279297,
"learning_rate": 7.741935483870968e-07,
"loss": 1.7066,
"step": 12
},
{
"epoch": 0.0008404651651433236,
"grad_norm": 18.35325050354004,
"learning_rate": 8.38709677419355e-07,
"loss": 1.7261,
"step": 13
},
{
"epoch": 0.00090511633169281,
"grad_norm": 20.82447052001953,
"learning_rate": 9.032258064516129e-07,
"loss": 1.8172,
"step": 14
},
{
"epoch": 0.0009697674982422964,
"grad_norm": 10.829120635986328,
"learning_rate": 9.67741935483871e-07,
"loss": 1.7402,
"step": 15
},
{
"epoch": 0.0010344186647917829,
"grad_norm": 31.75849723815918,
"learning_rate": 1.0322580645161291e-06,
"loss": 1.618,
"step": 16
},
{
"epoch": 0.0010990698313412693,
"grad_norm": 27.484895706176758,
"learning_rate": 1.0967741935483872e-06,
"loss": 1.5973,
"step": 17
},
{
"epoch": 0.0011637209978907557,
"grad_norm": 20.07984161376953,
"learning_rate": 1.1612903225806454e-06,
"loss": 1.6907,
"step": 18
},
{
"epoch": 0.0012283721644402422,
"grad_norm": 14.788183212280273,
"learning_rate": 1.2258064516129033e-06,
"loss": 1.6986,
"step": 19
},
{
"epoch": 0.0012930233309897286,
"grad_norm": 9.38936996459961,
"learning_rate": 1.2903225806451614e-06,
"loss": 1.6164,
"step": 20
},
{
"epoch": 0.001357674497539215,
"grad_norm": 8.975388526916504,
"learning_rate": 1.3548387096774195e-06,
"loss": 1.5637,
"step": 21
},
{
"epoch": 0.0014223256640887015,
"grad_norm": 14.67812442779541,
"learning_rate": 1.4193548387096776e-06,
"loss": 1.6869,
"step": 22
},
{
"epoch": 0.0014869768306381879,
"grad_norm": 8.56955337524414,
"learning_rate": 1.4838709677419356e-06,
"loss": 1.5948,
"step": 23
},
{
"epoch": 0.0015516279971876743,
"grad_norm": 5.918207168579102,
"learning_rate": 1.5483870967741937e-06,
"loss": 1.61,
"step": 24
},
{
"epoch": 0.0016162791637371607,
"grad_norm": 8.165249824523926,
"learning_rate": 1.6129032258064516e-06,
"loss": 1.712,
"step": 25
},
{
"epoch": 0.0016809303302866472,
"grad_norm": 9.301441192626953,
"learning_rate": 1.67741935483871e-06,
"loss": 1.6957,
"step": 26
},
{
"epoch": 0.0017455814968361336,
"grad_norm": 7.351085662841797,
"learning_rate": 1.7419354838709678e-06,
"loss": 1.5153,
"step": 27
},
{
"epoch": 0.00181023266338562,
"grad_norm": 7.347102642059326,
"learning_rate": 1.8064516129032258e-06,
"loss": 1.6625,
"step": 28
},
{
"epoch": 0.0018748838299351065,
"grad_norm": 7.22580623626709,
"learning_rate": 1.870967741935484e-06,
"loss": 1.5632,
"step": 29
},
{
"epoch": 0.001939534996484593,
"grad_norm": 5.857529163360596,
"learning_rate": 1.935483870967742e-06,
"loss": 1.5339,
"step": 30
},
{
"epoch": 0.002004186163034079,
"grad_norm": 7.3882222175598145,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.5775,
"step": 31
},
{
"epoch": 0.0020688373295835658,
"grad_norm": 6.075866222381592,
"learning_rate": 2.0645161290322582e-06,
"loss": 1.55,
"step": 32
},
{
"epoch": 0.002133488496133052,
"grad_norm": 4.930643081665039,
"learning_rate": 2.129032258064516e-06,
"loss": 1.5525,
"step": 33
},
{
"epoch": 0.0021981396626825386,
"grad_norm": 6.426210880279541,
"learning_rate": 2.1935483870967745e-06,
"loss": 1.5845,
"step": 34
},
{
"epoch": 0.002262790829232025,
"grad_norm": 7.546030521392822,
"learning_rate": 2.2580645161290324e-06,
"loss": 1.6313,
"step": 35
},
{
"epoch": 0.0023274419957815115,
"grad_norm": 4.881446361541748,
"learning_rate": 2.3225806451612907e-06,
"loss": 1.5387,
"step": 36
},
{
"epoch": 0.0023920931623309977,
"grad_norm": 5.156312942504883,
"learning_rate": 2.3870967741935486e-06,
"loss": 1.4912,
"step": 37
},
{
"epoch": 0.0024567443288804843,
"grad_norm": 6.470444202423096,
"learning_rate": 2.4516129032258066e-06,
"loss": 1.4706,
"step": 38
},
{
"epoch": 0.0025213954954299705,
"grad_norm": 15.289785385131836,
"learning_rate": 2.5161290322580645e-06,
"loss": 1.4612,
"step": 39
},
{
"epoch": 0.002586046661979457,
"grad_norm": 6.442019939422607,
"learning_rate": 2.580645161290323e-06,
"loss": 1.5178,
"step": 40
},
{
"epoch": 0.0026506978285289434,
"grad_norm": 5.88471794128418,
"learning_rate": 2.645161290322581e-06,
"loss": 1.5095,
"step": 41
},
{
"epoch": 0.00271534899507843,
"grad_norm": 7.326111316680908,
"learning_rate": 2.709677419354839e-06,
"loss": 1.5718,
"step": 42
},
{
"epoch": 0.0027800001616279163,
"grad_norm": 4.946439266204834,
"learning_rate": 2.774193548387097e-06,
"loss": 1.5057,
"step": 43
},
{
"epoch": 0.002844651328177403,
"grad_norm": 5.956087589263916,
"learning_rate": 2.8387096774193553e-06,
"loss": 1.4635,
"step": 44
},
{
"epoch": 0.002909302494726889,
"grad_norm": 7.258240222930908,
"learning_rate": 2.903225806451613e-06,
"loss": 1.5494,
"step": 45
},
{
"epoch": 0.0029739536612763758,
"grad_norm": 5.20070743560791,
"learning_rate": 2.967741935483871e-06,
"loss": 1.5329,
"step": 46
},
{
"epoch": 0.003038604827825862,
"grad_norm": 6.215978145599365,
"learning_rate": 3.0322580645161295e-06,
"loss": 1.4822,
"step": 47
},
{
"epoch": 0.0031032559943753486,
"grad_norm": 4.830237865447998,
"learning_rate": 3.0967741935483874e-06,
"loss": 1.457,
"step": 48
},
{
"epoch": 0.003167907160924835,
"grad_norm": 6.3563971519470215,
"learning_rate": 3.1612903225806453e-06,
"loss": 1.4619,
"step": 49
},
{
"epoch": 0.0032325583274743215,
"grad_norm": 5.210926532745361,
"learning_rate": 3.225806451612903e-06,
"loss": 1.446,
"step": 50
},
{
"epoch": 0.0032972094940238077,
"grad_norm": 5.412484169006348,
"learning_rate": 3.2903225806451615e-06,
"loss": 1.4154,
"step": 51
},
{
"epoch": 0.0033618606605732944,
"grad_norm": 5.500335693359375,
"learning_rate": 3.35483870967742e-06,
"loss": 1.4902,
"step": 52
},
{
"epoch": 0.0034265118271227806,
"grad_norm": 5.576430320739746,
"learning_rate": 3.4193548387096773e-06,
"loss": 1.4931,
"step": 53
},
{
"epoch": 0.003491162993672267,
"grad_norm": 4.996302604675293,
"learning_rate": 3.4838709677419357e-06,
"loss": 1.374,
"step": 54
},
{
"epoch": 0.0035558141602217534,
"grad_norm": 4.846431255340576,
"learning_rate": 3.548387096774194e-06,
"loss": 1.5605,
"step": 55
},
{
"epoch": 0.00362046532677124,
"grad_norm": 5.039584636688232,
"learning_rate": 3.6129032258064515e-06,
"loss": 1.4703,
"step": 56
},
{
"epoch": 0.0036851164933207263,
"grad_norm": 4.541802406311035,
"learning_rate": 3.67741935483871e-06,
"loss": 1.4888,
"step": 57
},
{
"epoch": 0.003749767659870213,
"grad_norm": 4.2027997970581055,
"learning_rate": 3.741935483870968e-06,
"loss": 1.5091,
"step": 58
},
{
"epoch": 0.003814418826419699,
"grad_norm": 5.916923522949219,
"learning_rate": 3.8064516129032257e-06,
"loss": 1.4113,
"step": 59
},
{
"epoch": 0.003879069992969186,
"grad_norm": 4.324679374694824,
"learning_rate": 3.870967741935484e-06,
"loss": 1.5304,
"step": 60
},
{
"epoch": 0.0039437211595186724,
"grad_norm": 4.319639205932617,
"learning_rate": 3.935483870967742e-06,
"loss": 1.4203,
"step": 61
},
{
"epoch": 0.004008372326068158,
"grad_norm": 5.848201274871826,
"learning_rate": 4.000000000000001e-06,
"loss": 1.4394,
"step": 62
},
{
"epoch": 0.004073023492617645,
"grad_norm": 5.865200042724609,
"learning_rate": 4.064516129032259e-06,
"loss": 1.4537,
"step": 63
},
{
"epoch": 0.0041376746591671315,
"grad_norm": 4.825560569763184,
"learning_rate": 4.1290322580645165e-06,
"loss": 1.4732,
"step": 64
}
],
"logging_steps": 1,
"max_steps": 15467,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 16,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.80601377625047e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}