{ "best_metric": 0.4174307584762573, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.04538852578068264, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009077705156136529, "grad_norm": 19.090673446655273, "learning_rate": 0.00015, "loss": 9.7116, "step": 1 }, { "epoch": 0.0009077705156136529, "eval_loss": 9.237911224365234, "eval_runtime": 0.5515, "eval_samples_per_second": 90.658, "eval_steps_per_second": 5.439, "step": 1 }, { "epoch": 0.0018155410312273058, "grad_norm": 21.541793823242188, "learning_rate": 0.0003, "loss": 9.5187, "step": 2 }, { "epoch": 0.0027233115468409588, "grad_norm": 16.640724182128906, "learning_rate": 0.0002998600959423082, "loss": 7.2173, "step": 3 }, { "epoch": 0.0036310820624546117, "grad_norm": 172.39108276367188, "learning_rate": 0.0002994406737417567, "loss": 6.3391, "step": 4 }, { "epoch": 0.004538852578068265, "grad_norm": 9.659110069274902, "learning_rate": 0.00029874260271490463, "loss": 3.7329, "step": 5 }, { "epoch": 0.0054466230936819175, "grad_norm": 11.84581470489502, "learning_rate": 0.00029776732972055516, "loss": 2.0967, "step": 6 }, { "epoch": 0.0063543936092955704, "grad_norm": 35.30651092529297, "learning_rate": 0.0002965168761609197, "loss": 1.4312, "step": 7 }, { "epoch": 0.007262164124909223, "grad_norm": 8.859146118164062, "learning_rate": 0.0002949938337919529, "loss": 0.8472, "step": 8 }, { "epoch": 0.008169934640522876, "grad_norm": 9.34030532836914, "learning_rate": 0.0002932013593515431, "loss": 0.8184, "step": 9 }, { "epoch": 0.00907770515613653, "grad_norm": 7.431899070739746, "learning_rate": 0.00029114316801669057, "loss": 0.7907, "step": 10 }, { "epoch": 0.009985475671750182, "grad_norm": 8.734899520874023, "learning_rate": 0.00028882352570323616, "loss": 0.7096, "step": 11 }, { "epoch": 0.010893246187363835, "grad_norm": 8.801371574401855, "learning_rate": 0.00028624724022409897, "loss": 0.8588, "step": 12 }, { "epoch": 0.011801016702977488, "grad_norm": 7.757359981536865, "learning_rate": 0.0002834196513243502, "loss": 0.8325, "step": 13 }, { "epoch": 0.012708787218591141, "grad_norm": 5.052784442901611, "learning_rate": 0.0002803466196137759, "loss": 0.5601, "step": 14 }, { "epoch": 0.013616557734204794, "grad_norm": 3.7386362552642822, "learning_rate": 0.00027703451441986836, "loss": 0.5656, "step": 15 }, { "epoch": 0.014524328249818447, "grad_norm": 4.285150527954102, "learning_rate": 0.000273490200586422, "loss": 0.5889, "step": 16 }, { "epoch": 0.015432098765432098, "grad_norm": 1.7297357320785522, "learning_rate": 0.00026972102424509665, "loss": 0.5514, "step": 17 }, { "epoch": 0.016339869281045753, "grad_norm": 4.028224468231201, "learning_rate": 0.00026573479758943753, "loss": 0.622, "step": 18 }, { "epoch": 0.017247639796659404, "grad_norm": 3.1785671710968018, "learning_rate": 0.0002615397826829114, "loss": 0.6005, "step": 19 }, { "epoch": 0.01815541031227306, "grad_norm": 2.6770856380462646, "learning_rate": 0.0002571446743345183, "loss": 0.5465, "step": 20 }, { "epoch": 0.01906318082788671, "grad_norm": 1.3410382270812988, "learning_rate": 0.00025255858207747205, "loss": 0.4898, "step": 21 }, { "epoch": 0.019970951343500364, "grad_norm": 6.422758102416992, "learning_rate": 0.0002477910112883017, "loss": 0.6803, "step": 22 }, { "epoch": 0.020878721859114015, "grad_norm": 4.62112283706665, "learning_rate": 0.00024285184348550706, "loss": 0.5983, "step": 23 }, { "epoch": 0.02178649237472767, "grad_norm": 1.9435359239578247, "learning_rate": 0.0002377513158486027, "loss": 0.5102, "step": 24 }, { "epoch": 0.02269426289034132, "grad_norm": 1.9337120056152344, "learning_rate": 0.00023249999999999999, "loss": 0.5099, "step": 25 }, { "epoch": 0.02269426289034132, "eval_loss": 0.6093809008598328, "eval_runtime": 0.5517, "eval_samples_per_second": 90.623, "eval_steps_per_second": 5.437, "step": 25 }, { "epoch": 0.023602033405954976, "grad_norm": 4.949934959411621, "learning_rate": 0.00022710878009370554, "loss": 0.6187, "step": 26 }, { "epoch": 0.024509803921568627, "grad_norm": 3.945021390914917, "learning_rate": 0.00022158883025624965, "loss": 0.5797, "step": 27 }, { "epoch": 0.025417574437182282, "grad_norm": 2.5016119480133057, "learning_rate": 0.0002159515914266029, "loss": 0.5343, "step": 28 }, { "epoch": 0.026325344952795933, "grad_norm": 1.574798345565796, "learning_rate": 0.0002102087476430831, "loss": 0.5074, "step": 29 }, { "epoch": 0.027233115468409588, "grad_norm": 1.7340073585510254, "learning_rate": 0.00020437220182640135, "loss": 0.4987, "step": 30 }, { "epoch": 0.02814088598402324, "grad_norm": 1.8289647102355957, "learning_rate": 0.00019845405110904146, "loss": 0.522, "step": 31 }, { "epoch": 0.029048656499636893, "grad_norm": 1.8377400636672974, "learning_rate": 0.00019246656176210558, "loss": 0.4699, "step": 32 }, { "epoch": 0.029956427015250545, "grad_norm": 2.042335271835327, "learning_rate": 0.0001864221437715939, "loss": 0.4408, "step": 33 }, { "epoch": 0.030864197530864196, "grad_norm": 2.779773712158203, "learning_rate": 0.0001803333251168141, "loss": 0.5595, "step": 34 }, { "epoch": 0.03177196804647785, "grad_norm": 2.2879557609558105, "learning_rate": 0.00017421272580423058, "loss": 0.5574, "step": 35 }, { "epoch": 0.032679738562091505, "grad_norm": 2.1511833667755127, "learning_rate": 0.00016807303171057425, "loss": 0.5359, "step": 36 }, { "epoch": 0.033587509077705156, "grad_norm": 1.0012569427490234, "learning_rate": 0.00016192696828942573, "loss": 0.4326, "step": 37 }, { "epoch": 0.03449527959331881, "grad_norm": 2.745234489440918, "learning_rate": 0.00015578727419576942, "loss": 0.5126, "step": 38 }, { "epoch": 0.03540305010893246, "grad_norm": 2.2056078910827637, "learning_rate": 0.00014966667488318586, "loss": 0.4623, "step": 39 }, { "epoch": 0.03631082062454612, "grad_norm": 1.4716527462005615, "learning_rate": 0.00014357785622840606, "loss": 0.4342, "step": 40 }, { "epoch": 0.03721859114015977, "grad_norm": 1.3762019872665405, "learning_rate": 0.00013753343823789445, "loss": 0.4361, "step": 41 }, { "epoch": 0.03812636165577342, "grad_norm": 2.7313358783721924, "learning_rate": 0.00013154594889095854, "loss": 0.4984, "step": 42 }, { "epoch": 0.03903413217138707, "grad_norm": 2.4179422855377197, "learning_rate": 0.00012562779817359865, "loss": 0.4451, "step": 43 }, { "epoch": 0.03994190268700073, "grad_norm": 2.6130852699279785, "learning_rate": 0.00011979125235691685, "loss": 0.5173, "step": 44 }, { "epoch": 0.04084967320261438, "grad_norm": 2.8871238231658936, "learning_rate": 0.00011404840857339706, "loss": 0.4606, "step": 45 }, { "epoch": 0.04175744371822803, "grad_norm": 2.950117826461792, "learning_rate": 0.0001084111697437504, "loss": 0.4586, "step": 46 }, { "epoch": 0.04266521423384168, "grad_norm": 0.984398365020752, "learning_rate": 0.00010289121990629447, "loss": 0.4926, "step": 47 }, { "epoch": 0.04357298474945534, "grad_norm": 2.0302248001098633, "learning_rate": 9.750000000000003e-05, "loss": 0.4309, "step": 48 }, { "epoch": 0.04448075526506899, "grad_norm": 1.1805931329727173, "learning_rate": 9.22486841513973e-05, "loss": 0.4593, "step": 49 }, { "epoch": 0.04538852578068264, "grad_norm": 1.6091700792312622, "learning_rate": 8.714815651449293e-05, "loss": 0.4697, "step": 50 }, { "epoch": 0.04538852578068264, "eval_loss": 0.4174307584762573, "eval_runtime": 0.5516, "eval_samples_per_second": 90.651, "eval_steps_per_second": 5.439, "step": 50 } ], "logging_steps": 1, "max_steps": 71, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.81327834021888e+16, "train_batch_size": 6, "trial_name": null, "trial_params": null }