|
{ |
|
"best_metric": 0.4174307584762573, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.04538852578068264, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009077705156136529, |
|
"grad_norm": 19.090673446655273, |
|
"learning_rate": 0.00015, |
|
"loss": 9.7116, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009077705156136529, |
|
"eval_loss": 9.237911224365234, |
|
"eval_runtime": 0.5515, |
|
"eval_samples_per_second": 90.658, |
|
"eval_steps_per_second": 5.439, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018155410312273058, |
|
"grad_norm": 21.541793823242188, |
|
"learning_rate": 0.0003, |
|
"loss": 9.5187, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0027233115468409588, |
|
"grad_norm": 16.640724182128906, |
|
"learning_rate": 0.0002998600959423082, |
|
"loss": 7.2173, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0036310820624546117, |
|
"grad_norm": 172.39108276367188, |
|
"learning_rate": 0.0002994406737417567, |
|
"loss": 6.3391, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004538852578068265, |
|
"grad_norm": 9.659110069274902, |
|
"learning_rate": 0.00029874260271490463, |
|
"loss": 3.7329, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0054466230936819175, |
|
"grad_norm": 11.84581470489502, |
|
"learning_rate": 0.00029776732972055516, |
|
"loss": 2.0967, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0063543936092955704, |
|
"grad_norm": 35.30651092529297, |
|
"learning_rate": 0.0002965168761609197, |
|
"loss": 1.4312, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.007262164124909223, |
|
"grad_norm": 8.859146118164062, |
|
"learning_rate": 0.0002949938337919529, |
|
"loss": 0.8472, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008169934640522876, |
|
"grad_norm": 9.34030532836914, |
|
"learning_rate": 0.0002932013593515431, |
|
"loss": 0.8184, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00907770515613653, |
|
"grad_norm": 7.431899070739746, |
|
"learning_rate": 0.00029114316801669057, |
|
"loss": 0.7907, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009985475671750182, |
|
"grad_norm": 8.734899520874023, |
|
"learning_rate": 0.00028882352570323616, |
|
"loss": 0.7096, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010893246187363835, |
|
"grad_norm": 8.801371574401855, |
|
"learning_rate": 0.00028624724022409897, |
|
"loss": 0.8588, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011801016702977488, |
|
"grad_norm": 7.757359981536865, |
|
"learning_rate": 0.0002834196513243502, |
|
"loss": 0.8325, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.012708787218591141, |
|
"grad_norm": 5.052784442901611, |
|
"learning_rate": 0.0002803466196137759, |
|
"loss": 0.5601, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013616557734204794, |
|
"grad_norm": 3.7386362552642822, |
|
"learning_rate": 0.00027703451441986836, |
|
"loss": 0.5656, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.014524328249818447, |
|
"grad_norm": 4.285150527954102, |
|
"learning_rate": 0.000273490200586422, |
|
"loss": 0.5889, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.015432098765432098, |
|
"grad_norm": 1.7297357320785522, |
|
"learning_rate": 0.00026972102424509665, |
|
"loss": 0.5514, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.016339869281045753, |
|
"grad_norm": 4.028224468231201, |
|
"learning_rate": 0.00026573479758943753, |
|
"loss": 0.622, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.017247639796659404, |
|
"grad_norm": 3.1785671710968018, |
|
"learning_rate": 0.0002615397826829114, |
|
"loss": 0.6005, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01815541031227306, |
|
"grad_norm": 2.6770856380462646, |
|
"learning_rate": 0.0002571446743345183, |
|
"loss": 0.5465, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01906318082788671, |
|
"grad_norm": 1.3410382270812988, |
|
"learning_rate": 0.00025255858207747205, |
|
"loss": 0.4898, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.019970951343500364, |
|
"grad_norm": 6.422758102416992, |
|
"learning_rate": 0.0002477910112883017, |
|
"loss": 0.6803, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.020878721859114015, |
|
"grad_norm": 4.62112283706665, |
|
"learning_rate": 0.00024285184348550706, |
|
"loss": 0.5983, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02178649237472767, |
|
"grad_norm": 1.9435359239578247, |
|
"learning_rate": 0.0002377513158486027, |
|
"loss": 0.5102, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02269426289034132, |
|
"grad_norm": 1.9337120056152344, |
|
"learning_rate": 0.00023249999999999999, |
|
"loss": 0.5099, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02269426289034132, |
|
"eval_loss": 0.6093809008598328, |
|
"eval_runtime": 0.5517, |
|
"eval_samples_per_second": 90.623, |
|
"eval_steps_per_second": 5.437, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.023602033405954976, |
|
"grad_norm": 4.949934959411621, |
|
"learning_rate": 0.00022710878009370554, |
|
"loss": 0.6187, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.024509803921568627, |
|
"grad_norm": 3.945021390914917, |
|
"learning_rate": 0.00022158883025624965, |
|
"loss": 0.5797, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.025417574437182282, |
|
"grad_norm": 2.5016119480133057, |
|
"learning_rate": 0.0002159515914266029, |
|
"loss": 0.5343, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.026325344952795933, |
|
"grad_norm": 1.574798345565796, |
|
"learning_rate": 0.0002102087476430831, |
|
"loss": 0.5074, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.027233115468409588, |
|
"grad_norm": 1.7340073585510254, |
|
"learning_rate": 0.00020437220182640135, |
|
"loss": 0.4987, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02814088598402324, |
|
"grad_norm": 1.8289647102355957, |
|
"learning_rate": 0.00019845405110904146, |
|
"loss": 0.522, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.029048656499636893, |
|
"grad_norm": 1.8377400636672974, |
|
"learning_rate": 0.00019246656176210558, |
|
"loss": 0.4699, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.029956427015250545, |
|
"grad_norm": 2.042335271835327, |
|
"learning_rate": 0.0001864221437715939, |
|
"loss": 0.4408, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.030864197530864196, |
|
"grad_norm": 2.779773712158203, |
|
"learning_rate": 0.0001803333251168141, |
|
"loss": 0.5595, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03177196804647785, |
|
"grad_norm": 2.2879557609558105, |
|
"learning_rate": 0.00017421272580423058, |
|
"loss": 0.5574, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.032679738562091505, |
|
"grad_norm": 2.1511833667755127, |
|
"learning_rate": 0.00016807303171057425, |
|
"loss": 0.5359, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.033587509077705156, |
|
"grad_norm": 1.0012569427490234, |
|
"learning_rate": 0.00016192696828942573, |
|
"loss": 0.4326, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03449527959331881, |
|
"grad_norm": 2.745234489440918, |
|
"learning_rate": 0.00015578727419576942, |
|
"loss": 0.5126, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03540305010893246, |
|
"grad_norm": 2.2056078910827637, |
|
"learning_rate": 0.00014966667488318586, |
|
"loss": 0.4623, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03631082062454612, |
|
"grad_norm": 1.4716527462005615, |
|
"learning_rate": 0.00014357785622840606, |
|
"loss": 0.4342, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03721859114015977, |
|
"grad_norm": 1.3762019872665405, |
|
"learning_rate": 0.00013753343823789445, |
|
"loss": 0.4361, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03812636165577342, |
|
"grad_norm": 2.7313358783721924, |
|
"learning_rate": 0.00013154594889095854, |
|
"loss": 0.4984, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03903413217138707, |
|
"grad_norm": 2.4179422855377197, |
|
"learning_rate": 0.00012562779817359865, |
|
"loss": 0.4451, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03994190268700073, |
|
"grad_norm": 2.6130852699279785, |
|
"learning_rate": 0.00011979125235691685, |
|
"loss": 0.5173, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04084967320261438, |
|
"grad_norm": 2.8871238231658936, |
|
"learning_rate": 0.00011404840857339706, |
|
"loss": 0.4606, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04175744371822803, |
|
"grad_norm": 2.950117826461792, |
|
"learning_rate": 0.0001084111697437504, |
|
"loss": 0.4586, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04266521423384168, |
|
"grad_norm": 0.984398365020752, |
|
"learning_rate": 0.00010289121990629447, |
|
"loss": 0.4926, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04357298474945534, |
|
"grad_norm": 2.0302248001098633, |
|
"learning_rate": 9.750000000000003e-05, |
|
"loss": 0.4309, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04448075526506899, |
|
"grad_norm": 1.1805931329727173, |
|
"learning_rate": 9.22486841513973e-05, |
|
"loss": 0.4593, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04538852578068264, |
|
"grad_norm": 1.6091700792312622, |
|
"learning_rate": 8.714815651449293e-05, |
|
"loss": 0.4697, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04538852578068264, |
|
"eval_loss": 0.4174307584762573, |
|
"eval_runtime": 0.5516, |
|
"eval_samples_per_second": 90.651, |
|
"eval_steps_per_second": 5.439, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 71, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.81327834021888e+16, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|