bbytxt's picture
Training in progress, step 50, checkpoint
f15d881 verified
raw
history blame
10.2 kB
{
"best_metric": 1.7998709678649902,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.5698005698005698,
"eval_steps": 25,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011396011396011397,
"grad_norm": 1.2519962787628174,
"learning_rate": 2.9999999999999997e-05,
"loss": 2.4206,
"step": 1
},
{
"epoch": 0.011396011396011397,
"eval_loss": 3.1777703762054443,
"eval_runtime": 2.6753,
"eval_samples_per_second": 18.689,
"eval_steps_per_second": 2.617,
"step": 1
},
{
"epoch": 0.022792022792022793,
"grad_norm": 1.8308161497116089,
"learning_rate": 5.9999999999999995e-05,
"loss": 2.5791,
"step": 2
},
{
"epoch": 0.03418803418803419,
"grad_norm": 2.0269980430603027,
"learning_rate": 8.999999999999999e-05,
"loss": 2.6558,
"step": 3
},
{
"epoch": 0.045584045584045586,
"grad_norm": 2.5735702514648438,
"learning_rate": 0.00011999999999999999,
"loss": 2.8524,
"step": 4
},
{
"epoch": 0.05698005698005698,
"grad_norm": 2.610076904296875,
"learning_rate": 0.00015,
"loss": 2.6163,
"step": 5
},
{
"epoch": 0.06837606837606838,
"grad_norm": 2.135373830795288,
"learning_rate": 0.00017999999999999998,
"loss": 2.5374,
"step": 6
},
{
"epoch": 0.07977207977207977,
"grad_norm": 1.9062364101409912,
"learning_rate": 0.00020999999999999998,
"loss": 2.6469,
"step": 7
},
{
"epoch": 0.09116809116809117,
"grad_norm": 1.4977805614471436,
"learning_rate": 0.00023999999999999998,
"loss": 2.2611,
"step": 8
},
{
"epoch": 0.10256410256410256,
"grad_norm": 1.4389641284942627,
"learning_rate": 0.00027,
"loss": 2.2009,
"step": 9
},
{
"epoch": 0.11396011396011396,
"grad_norm": 1.9085431098937988,
"learning_rate": 0.0003,
"loss": 2.2492,
"step": 10
},
{
"epoch": 0.12535612535612536,
"grad_norm": 1.7611523866653442,
"learning_rate": 0.0002999731384004606,
"loss": 2.2406,
"step": 11
},
{
"epoch": 0.13675213675213677,
"grad_norm": 1.9762187004089355,
"learning_rate": 0.0002998925632224497,
"loss": 2.21,
"step": 12
},
{
"epoch": 0.14814814814814814,
"grad_norm": 1.983385443687439,
"learning_rate": 0.00029975830332434265,
"loss": 1.9138,
"step": 13
},
{
"epoch": 0.15954415954415954,
"grad_norm": 2.5348525047302246,
"learning_rate": 0.00029957040679194776,
"loss": 2.0607,
"step": 14
},
{
"epoch": 0.17094017094017094,
"grad_norm": 1.8772743940353394,
"learning_rate": 0.00029932894092128383,
"loss": 1.7761,
"step": 15
},
{
"epoch": 0.18233618233618235,
"grad_norm": 1.8809691667556763,
"learning_rate": 0.0002990339921944777,
"loss": 1.7878,
"step": 16
},
{
"epoch": 0.19373219373219372,
"grad_norm": 2.102220296859741,
"learning_rate": 0.00029868566624879054,
"loss": 2.2667,
"step": 17
},
{
"epoch": 0.20512820512820512,
"grad_norm": 2.032646894454956,
"learning_rate": 0.00029828408783878324,
"loss": 1.6751,
"step": 18
},
{
"epoch": 0.21652421652421652,
"grad_norm": 2.541229486465454,
"learning_rate": 0.00029782940079163485,
"loss": 1.3936,
"step": 19
},
{
"epoch": 0.22792022792022792,
"grad_norm": 3.479897975921631,
"learning_rate": 0.00029732176795563037,
"loss": 1.7468,
"step": 20
},
{
"epoch": 0.23931623931623933,
"grad_norm": 4.591503143310547,
"learning_rate": 0.0002967613711418359,
"loss": 2.2668,
"step": 21
},
{
"epoch": 0.25071225071225073,
"grad_norm": 7.106602668762207,
"learning_rate": 0.000296148411058982,
"loss": 2.8446,
"step": 22
},
{
"epoch": 0.2621082621082621,
"grad_norm": 4.072912693023682,
"learning_rate": 0.00029548310724157904,
"loss": 2.4782,
"step": 23
},
{
"epoch": 0.27350427350427353,
"grad_norm": 3.0776100158691406,
"learning_rate": 0.0002947656979712899,
"loss": 2.3275,
"step": 24
},
{
"epoch": 0.2849002849002849,
"grad_norm": 1.9854662418365479,
"learning_rate": 0.0002939964401915884,
"loss": 2.2914,
"step": 25
},
{
"epoch": 0.2849002849002849,
"eval_loss": 2.036309242248535,
"eval_runtime": 1.6991,
"eval_samples_per_second": 29.427,
"eval_steps_per_second": 4.12,
"step": 25
},
{
"epoch": 0.2962962962962963,
"grad_norm": 1.2816338539123535,
"learning_rate": 0.0002931756094157332,
"loss": 2.0158,
"step": 26
},
{
"epoch": 0.3076923076923077,
"grad_norm": 1.3689543008804321,
"learning_rate": 0.0002923034996280924,
"loss": 1.9286,
"step": 27
},
{
"epoch": 0.3190883190883191,
"grad_norm": 1.3031697273254395,
"learning_rate": 0.0002913804231788509,
"loss": 2.0447,
"step": 28
},
{
"epoch": 0.33048433048433046,
"grad_norm": 1.2884080410003662,
"learning_rate": 0.00029040671067214087,
"loss": 1.7308,
"step": 29
},
{
"epoch": 0.3418803418803419,
"grad_norm": 1.2418413162231445,
"learning_rate": 0.0002893827108476348,
"loss": 1.7873,
"step": 30
},
{
"epoch": 0.35327635327635326,
"grad_norm": 1.2331762313842773,
"learning_rate": 0.000288308790455642,
"loss": 1.8249,
"step": 31
},
{
"epoch": 0.3646723646723647,
"grad_norm": 1.3798717260360718,
"learning_rate": 0.00028718533412575606,
"loss": 1.6925,
"step": 32
},
{
"epoch": 0.37606837606837606,
"grad_norm": 1.2883918285369873,
"learning_rate": 0.00028601274422909733,
"loss": 1.6585,
"step": 33
},
{
"epoch": 0.38746438746438744,
"grad_norm": 1.4337915182113647,
"learning_rate": 0.00028479144073420234,
"loss": 1.6603,
"step": 34
},
{
"epoch": 0.39886039886039887,
"grad_norm": 1.4459898471832275,
"learning_rate": 0.0002835218610566095,
"loss": 1.811,
"step": 35
},
{
"epoch": 0.41025641025641024,
"grad_norm": 1.5042760372161865,
"learning_rate": 0.0002822044599021973,
"loss": 1.5776,
"step": 36
},
{
"epoch": 0.42165242165242167,
"grad_norm": 1.721915364265442,
"learning_rate": 0.0002808397091043291,
"loss": 1.5148,
"step": 37
},
{
"epoch": 0.43304843304843305,
"grad_norm": 1.7263678312301636,
"learning_rate": 0.00027942809745486343,
"loss": 1.6432,
"step": 38
},
{
"epoch": 0.4444444444444444,
"grad_norm": 1.4137566089630127,
"learning_rate": 0.0002779701305290915,
"loss": 1.1639,
"step": 39
},
{
"epoch": 0.45584045584045585,
"grad_norm": 1.5665677785873413,
"learning_rate": 0.00027646633050466265,
"loss": 1.2632,
"step": 40
},
{
"epoch": 0.4672364672364672,
"grad_norm": 2.0215096473693848,
"learning_rate": 0.0002749172359745641,
"loss": 1.3551,
"step": 41
},
{
"epoch": 0.47863247863247865,
"grad_norm": 4.784286975860596,
"learning_rate": 0.0002733234017542215,
"loss": 2.3304,
"step": 42
},
{
"epoch": 0.49002849002849,
"grad_norm": 4.08688497543335,
"learning_rate": 0.0002716853986827888,
"loss": 2.402,
"step": 43
},
{
"epoch": 0.5014245014245015,
"grad_norm": 2.5830602645874023,
"learning_rate": 0.0002700038134187002,
"loss": 2.3759,
"step": 44
},
{
"epoch": 0.5128205128205128,
"grad_norm": 2.1645848751068115,
"learning_rate": 0.00026827924822955487,
"loss": 2.1305,
"step": 45
},
{
"epoch": 0.5242165242165242,
"grad_norm": 1.5849443674087524,
"learning_rate": 0.0002665123207764128,
"loss": 1.9809,
"step": 46
},
{
"epoch": 0.5356125356125356,
"grad_norm": 1.2589260339736938,
"learning_rate": 0.00026470366389257614,
"loss": 1.706,
"step": 47
},
{
"epoch": 0.5470085470085471,
"grad_norm": 1.0531516075134277,
"learning_rate": 0.0002628539253569372,
"loss": 1.9793,
"step": 48
},
{
"epoch": 0.5584045584045584,
"grad_norm": 1.193375587463379,
"learning_rate": 0.00026096376766197307,
"loss": 1.9775,
"step": 49
},
{
"epoch": 0.5698005698005698,
"grad_norm": 1.1033531427383423,
"learning_rate": 0.00025903386777647154,
"loss": 1.8742,
"step": 50
},
{
"epoch": 0.5698005698005698,
"eval_loss": 1.7998709678649902,
"eval_runtime": 1.7036,
"eval_samples_per_second": 29.349,
"eval_steps_per_second": 4.109,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 176,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2522914467282944e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}