error577's picture
Training in progress, step 50, checkpoint
37b2614 verified
{
"best_metric": 2.580864906311035,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.05,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001,
"grad_norm": 0.2811749279499054,
"learning_rate": 2e-05,
"loss": 2.6895,
"step": 1
},
{
"epoch": 0.001,
"eval_loss": 3.0125324726104736,
"eval_runtime": 4.6936,
"eval_samples_per_second": 4.474,
"eval_steps_per_second": 4.474,
"step": 1
},
{
"epoch": 0.002,
"grad_norm": 0.2987586557865143,
"learning_rate": 4e-05,
"loss": 2.8912,
"step": 2
},
{
"epoch": 0.003,
"grad_norm": 0.452608197927475,
"learning_rate": 6e-05,
"loss": 3.4357,
"step": 3
},
{
"epoch": 0.004,
"grad_norm": 0.38785919547080994,
"learning_rate": 8e-05,
"loss": 2.5889,
"step": 4
},
{
"epoch": 0.005,
"grad_norm": 0.38931822776794434,
"learning_rate": 0.0001,
"loss": 2.7513,
"step": 5
},
{
"epoch": 0.006,
"grad_norm": 0.516417384147644,
"learning_rate": 0.00012,
"loss": 3.2128,
"step": 6
},
{
"epoch": 0.007,
"grad_norm": 0.4206741750240326,
"learning_rate": 0.00014,
"loss": 2.9368,
"step": 7
},
{
"epoch": 0.008,
"grad_norm": 0.48171964287757874,
"learning_rate": 0.00016,
"loss": 2.8618,
"step": 8
},
{
"epoch": 0.009,
"grad_norm": 0.8544142842292786,
"learning_rate": 0.00018,
"loss": 3.0312,
"step": 9
},
{
"epoch": 0.01,
"grad_norm": 0.848558247089386,
"learning_rate": 0.0002,
"loss": 2.9334,
"step": 10
},
{
"epoch": 0.011,
"grad_norm": 0.8914313316345215,
"learning_rate": 0.00019999996900269505,
"loss": 2.7981,
"step": 11
},
{
"epoch": 0.012,
"grad_norm": 0.6103464365005493,
"learning_rate": 0.0001999998760107994,
"loss": 2.7247,
"step": 12
},
{
"epoch": 0.013,
"grad_norm": 0.7618600726127625,
"learning_rate": 0.00019999972102437074,
"loss": 2.472,
"step": 13
},
{
"epoch": 0.014,
"grad_norm": 0.6825264692306519,
"learning_rate": 0.00019999950404350512,
"loss": 2.6008,
"step": 14
},
{
"epoch": 0.015,
"grad_norm": 0.5940832495689392,
"learning_rate": 0.00019999922506833704,
"loss": 2.1996,
"step": 15
},
{
"epoch": 0.016,
"grad_norm": 0.6273623108863831,
"learning_rate": 0.00019999888409903948,
"loss": 2.3565,
"step": 16
},
{
"epoch": 0.017,
"grad_norm": 0.7437952160835266,
"learning_rate": 0.00019999848113582384,
"loss": 2.7232,
"step": 17
},
{
"epoch": 0.018,
"grad_norm": 0.5971533060073853,
"learning_rate": 0.0001999980161789399,
"loss": 2.509,
"step": 18
},
{
"epoch": 0.019,
"grad_norm": 0.5190719962120056,
"learning_rate": 0.00019999748922867592,
"loss": 2.3535,
"step": 19
},
{
"epoch": 0.02,
"grad_norm": 0.9244285821914673,
"learning_rate": 0.00019999690028535855,
"loss": 2.7599,
"step": 20
},
{
"epoch": 0.021,
"grad_norm": 0.8340674638748169,
"learning_rate": 0.00019999624934935296,
"loss": 3.0057,
"step": 21
},
{
"epoch": 0.022,
"grad_norm": 1.0633089542388916,
"learning_rate": 0.00019999553642106266,
"loss": 2.2808,
"step": 22
},
{
"epoch": 0.023,
"grad_norm": 4.8767266273498535,
"learning_rate": 0.00019999476150092967,
"loss": 2.8268,
"step": 23
},
{
"epoch": 0.024,
"grad_norm": 2.7197344303131104,
"learning_rate": 0.00019999392458943432,
"loss": 2.6517,
"step": 24
},
{
"epoch": 0.025,
"grad_norm": 0.9329593777656555,
"learning_rate": 0.00019999302568709547,
"loss": 2.212,
"step": 25
},
{
"epoch": 0.026,
"grad_norm": 0.6679103374481201,
"learning_rate": 0.00019999206479447045,
"loss": 2.0117,
"step": 26
},
{
"epoch": 0.027,
"grad_norm": 0.5428286790847778,
"learning_rate": 0.00019999104191215493,
"loss": 2.7582,
"step": 27
},
{
"epoch": 0.028,
"grad_norm": 0.5552177429199219,
"learning_rate": 0.00019998995704078305,
"loss": 2.54,
"step": 28
},
{
"epoch": 0.029,
"grad_norm": 0.5453671216964722,
"learning_rate": 0.00019998881018102737,
"loss": 2.5358,
"step": 29
},
{
"epoch": 0.03,
"grad_norm": 0.47653189301490784,
"learning_rate": 0.00019998760133359885,
"loss": 2.2443,
"step": 30
},
{
"epoch": 0.031,
"grad_norm": 0.755976140499115,
"learning_rate": 0.0001999863304992469,
"loss": 2.5519,
"step": 31
},
{
"epoch": 0.032,
"grad_norm": 0.7680912017822266,
"learning_rate": 0.00019998499767875943,
"loss": 2.7503,
"step": 32
},
{
"epoch": 0.033,
"grad_norm": 3.768080472946167,
"learning_rate": 0.0001999836028729627,
"loss": 2.6051,
"step": 33
},
{
"epoch": 0.034,
"grad_norm": 0.5304062962532043,
"learning_rate": 0.00019998214608272136,
"loss": 2.2065,
"step": 34
},
{
"epoch": 0.035,
"grad_norm": 1.1568998098373413,
"learning_rate": 0.00019998062730893862,
"loss": 2.444,
"step": 35
},
{
"epoch": 0.036,
"grad_norm": 0.8356309533119202,
"learning_rate": 0.000199979046552556,
"loss": 2.5763,
"step": 36
},
{
"epoch": 0.037,
"grad_norm": 0.5210471749305725,
"learning_rate": 0.00019997740381455346,
"loss": 2.8545,
"step": 37
},
{
"epoch": 0.038,
"grad_norm": 1.550714373588562,
"learning_rate": 0.00019997569909594947,
"loss": 2.6236,
"step": 38
},
{
"epoch": 0.039,
"grad_norm": 0.6044741868972778,
"learning_rate": 0.0001999739323978008,
"loss": 2.5349,
"step": 39
},
{
"epoch": 0.04,
"grad_norm": 0.9703565239906311,
"learning_rate": 0.00019997210372120274,
"loss": 3.1004,
"step": 40
},
{
"epoch": 0.041,
"grad_norm": 0.7796650528907776,
"learning_rate": 0.000199970213067289,
"loss": 2.5757,
"step": 41
},
{
"epoch": 0.042,
"grad_norm": 0.6824871301651001,
"learning_rate": 0.00019996826043723162,
"loss": 2.6766,
"step": 42
},
{
"epoch": 0.043,
"grad_norm": 0.8048773407936096,
"learning_rate": 0.00019996624583224114,
"loss": 2.3065,
"step": 43
},
{
"epoch": 0.044,
"grad_norm": 0.5458154082298279,
"learning_rate": 0.00019996416925356652,
"loss": 2.4336,
"step": 44
},
{
"epoch": 0.045,
"grad_norm": 0.623190701007843,
"learning_rate": 0.00019996203070249516,
"loss": 2.3835,
"step": 45
},
{
"epoch": 0.046,
"grad_norm": 0.5928781032562256,
"learning_rate": 0.00019995983018035278,
"loss": 2.3408,
"step": 46
},
{
"epoch": 0.047,
"grad_norm": 0.5790976881980896,
"learning_rate": 0.00019995756768850364,
"loss": 2.3878,
"step": 47
},
{
"epoch": 0.048,
"grad_norm": 0.5648425817489624,
"learning_rate": 0.00019995524322835034,
"loss": 2.2885,
"step": 48
},
{
"epoch": 0.049,
"grad_norm": 0.526339054107666,
"learning_rate": 0.00019995285680133394,
"loss": 2.408,
"step": 49
},
{
"epoch": 0.05,
"grad_norm": 0.6333803534507751,
"learning_rate": 0.00019995040840893388,
"loss": 2.4391,
"step": 50
},
{
"epoch": 0.05,
"eval_loss": 2.580864906311035,
"eval_runtime": 4.8038,
"eval_samples_per_second": 4.372,
"eval_steps_per_second": 4.372,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 4000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4108715871436800.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}