{ "best_metric": 0.8281893730163574, "best_model_checkpoint": "miner_id_24/checkpoint-60", "epoch": 0.1431980906921241, "eval_steps": 5, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002386634844868735, "grad_norm": 0.7556721568107605, "learning_rate": 2e-05, "loss": 1.0664, "step": 1 }, { "epoch": 0.002386634844868735, "eval_loss": 1.0069398880004883, "eval_runtime": 26.3559, "eval_samples_per_second": 6.716, "eval_steps_per_second": 3.377, "step": 1 }, { "epoch": 0.00477326968973747, "grad_norm": 0.5291942358016968, "learning_rate": 4e-05, "loss": 0.7999, "step": 2 }, { "epoch": 0.007159904534606206, "grad_norm": 0.43523454666137695, "learning_rate": 6e-05, "loss": 0.8292, "step": 3 }, { "epoch": 0.00954653937947494, "grad_norm": 0.3985866606235504, "learning_rate": 8e-05, "loss": 0.7817, "step": 4 }, { "epoch": 0.011933174224343675, "grad_norm": 0.4628720283508301, "learning_rate": 0.0001, "loss": 0.7462, "step": 5 }, { "epoch": 0.011933174224343675, "eval_loss": 0.9824773073196411, "eval_runtime": 26.258, "eval_samples_per_second": 6.741, "eval_steps_per_second": 3.389, "step": 5 }, { "epoch": 0.014319809069212411, "grad_norm": 0.769433319568634, "learning_rate": 0.00012, "loss": 0.8973, "step": 6 }, { "epoch": 0.016706443914081145, "grad_norm": 0.3967779874801636, "learning_rate": 0.00014, "loss": 0.7884, "step": 7 }, { "epoch": 0.01909307875894988, "grad_norm": 0.6206874251365662, "learning_rate": 0.00016, "loss": 1.1007, "step": 8 }, { "epoch": 0.021479713603818614, "grad_norm": 0.3204325735569, "learning_rate": 0.00018, "loss": 0.6847, "step": 9 }, { "epoch": 0.02386634844868735, "grad_norm": 0.36889952421188354, "learning_rate": 0.0002, "loss": 0.7953, "step": 10 }, { "epoch": 0.02386634844868735, "eval_loss": 0.9038922190666199, "eval_runtime": 26.2871, "eval_samples_per_second": 6.733, "eval_steps_per_second": 3.386, "step": 10 }, { "epoch": 0.026252983293556086, "grad_norm": 0.4982057809829712, "learning_rate": 0.0001999979446958366, "loss": 1.1466, "step": 11 }, { "epoch": 0.028639618138424822, "grad_norm": 0.5518380999565125, "learning_rate": 0.00019999177886783194, "loss": 1.0339, "step": 12 }, { "epoch": 0.031026252983293555, "grad_norm": 0.43208760023117065, "learning_rate": 0.00019998150276943902, "loss": 0.7813, "step": 13 }, { "epoch": 0.03341288782816229, "grad_norm": 0.394111692905426, "learning_rate": 0.000199967116823068, "loss": 0.6632, "step": 14 }, { "epoch": 0.03579952267303103, "grad_norm": 0.40350112318992615, "learning_rate": 0.0001999486216200688, "loss": 1.0157, "step": 15 }, { "epoch": 0.03579952267303103, "eval_loss": 0.8692727088928223, "eval_runtime": 26.2786, "eval_samples_per_second": 6.736, "eval_steps_per_second": 3.387, "step": 15 }, { "epoch": 0.03818615751789976, "grad_norm": 0.3436519205570221, "learning_rate": 0.00019992601792070679, "loss": 0.7896, "step": 16 }, { "epoch": 0.0405727923627685, "grad_norm": 0.40182027220726013, "learning_rate": 0.00019989930665413147, "loss": 0.8483, "step": 17 }, { "epoch": 0.04295942720763723, "grad_norm": 0.4158000946044922, "learning_rate": 0.00019986848891833845, "loss": 0.8806, "step": 18 }, { "epoch": 0.045346062052505964, "grad_norm": 0.3862776458263397, "learning_rate": 0.0001998335659801241, "loss": 0.6556, "step": 19 }, { "epoch": 0.0477326968973747, "grad_norm": 0.4110924303531647, "learning_rate": 0.00019979453927503364, "loss": 0.8336, "step": 20 }, { "epoch": 0.0477326968973747, "eval_loss": 0.8611570596694946, "eval_runtime": 26.2716, "eval_samples_per_second": 6.737, "eval_steps_per_second": 3.388, "step": 20 }, { "epoch": 0.050119331742243436, "grad_norm": 0.3684820830821991, "learning_rate": 0.00019975141040730207, "loss": 0.849, "step": 21 }, { "epoch": 0.05250596658711217, "grad_norm": 0.37298089265823364, "learning_rate": 0.0001997041811497882, "loss": 0.6263, "step": 22 }, { "epoch": 0.05489260143198091, "grad_norm": 0.3827399015426636, "learning_rate": 0.00019965285344390184, "loss": 0.8503, "step": 23 }, { "epoch": 0.057279236276849645, "grad_norm": 0.4026002287864685, "learning_rate": 0.00019959742939952392, "loss": 0.7683, "step": 24 }, { "epoch": 0.059665871121718374, "grad_norm": 0.3691791296005249, "learning_rate": 0.00019953791129491983, "loss": 0.7776, "step": 25 }, { "epoch": 0.059665871121718374, "eval_loss": 0.8492560386657715, "eval_runtime": 26.2621, "eval_samples_per_second": 6.74, "eval_steps_per_second": 3.389, "step": 25 }, { "epoch": 0.06205250596658711, "grad_norm": 0.43015947937965393, "learning_rate": 0.00019947430157664576, "loss": 0.92, "step": 26 }, { "epoch": 0.06443914081145585, "grad_norm": 0.27037495374679565, "learning_rate": 0.00019940660285944803, "loss": 1.0085, "step": 27 }, { "epoch": 0.06682577565632458, "grad_norm": 0.26421141624450684, "learning_rate": 0.00019933481792615583, "loss": 0.679, "step": 28 }, { "epoch": 0.06921241050119331, "grad_norm": 0.40093734860420227, "learning_rate": 0.0001992589497275665, "loss": 0.8932, "step": 29 }, { "epoch": 0.07159904534606205, "grad_norm": 0.39930209517478943, "learning_rate": 0.0001991790013823246, "loss": 1.0749, "step": 30 }, { "epoch": 0.07159904534606205, "eval_loss": 0.8430463075637817, "eval_runtime": 26.2787, "eval_samples_per_second": 6.735, "eval_steps_per_second": 3.387, "step": 30 }, { "epoch": 0.07398568019093078, "grad_norm": 0.37395668029785156, "learning_rate": 0.00019909497617679348, "loss": 0.6267, "step": 31 }, { "epoch": 0.07637231503579953, "grad_norm": 0.29155558347702026, "learning_rate": 0.0001990068775649202, "loss": 0.4697, "step": 32 }, { "epoch": 0.07875894988066826, "grad_norm": 0.32951420545578003, "learning_rate": 0.00019891470916809362, "loss": 0.8564, "step": 33 }, { "epoch": 0.081145584725537, "grad_norm": 0.3665896952152252, "learning_rate": 0.00019881847477499557, "loss": 0.9376, "step": 34 }, { "epoch": 0.08353221957040573, "grad_norm": 0.3489833176136017, "learning_rate": 0.00019871817834144504, "loss": 0.6592, "step": 35 }, { "epoch": 0.08353221957040573, "eval_loss": 0.8413642644882202, "eval_runtime": 26.2984, "eval_samples_per_second": 6.73, "eval_steps_per_second": 3.384, "step": 35 }, { "epoch": 0.08591885441527446, "grad_norm": 0.335068941116333, "learning_rate": 0.0001986138239902355, "loss": 0.7706, "step": 36 }, { "epoch": 0.0883054892601432, "grad_norm": 0.4248191714286804, "learning_rate": 0.0001985054160109657, "loss": 1.1256, "step": 37 }, { "epoch": 0.09069212410501193, "grad_norm": 0.5729311108589172, "learning_rate": 0.00019839295885986296, "loss": 1.1197, "step": 38 }, { "epoch": 0.09307875894988067, "grad_norm": 0.37213680148124695, "learning_rate": 0.0001982764571596004, "loss": 0.8501, "step": 39 }, { "epoch": 0.0954653937947494, "grad_norm": 0.3951665163040161, "learning_rate": 0.00019815591569910654, "loss": 0.829, "step": 40 }, { "epoch": 0.0954653937947494, "eval_loss": 0.8374943137168884, "eval_runtime": 26.3244, "eval_samples_per_second": 6.724, "eval_steps_per_second": 3.381, "step": 40 }, { "epoch": 0.09785202863961814, "grad_norm": 0.385197252035141, "learning_rate": 0.00019803133943336874, "loss": 1.0619, "step": 41 }, { "epoch": 0.10023866348448687, "grad_norm": 0.3001837730407715, "learning_rate": 0.0001979027334832293, "loss": 0.7597, "step": 42 }, { "epoch": 0.1026252983293556, "grad_norm": 0.3275567293167114, "learning_rate": 0.00019777010313517518, "loss": 0.8198, "step": 43 }, { "epoch": 0.10501193317422435, "grad_norm": 0.3436608910560608, "learning_rate": 0.00019763345384112043, "loss": 0.7347, "step": 44 }, { "epoch": 0.10739856801909307, "grad_norm": 0.32263168692588806, "learning_rate": 0.00019749279121818235, "loss": 0.7859, "step": 45 }, { "epoch": 0.10739856801909307, "eval_loss": 0.8337910771369934, "eval_runtime": 26.3523, "eval_samples_per_second": 6.717, "eval_steps_per_second": 3.377, "step": 45 }, { "epoch": 0.10978520286396182, "grad_norm": 0.36578473448753357, "learning_rate": 0.00019734812104845047, "loss": 1.0011, "step": 46 }, { "epoch": 0.11217183770883055, "grad_norm": 0.29278111457824707, "learning_rate": 0.00019719944927874881, "loss": 0.5549, "step": 47 }, { "epoch": 0.11455847255369929, "grad_norm": 0.3916202783584595, "learning_rate": 0.0001970467820203915, "loss": 0.789, "step": 48 }, { "epoch": 0.11694510739856802, "grad_norm": 0.37124040722846985, "learning_rate": 0.00019689012554893154, "loss": 0.7751, "step": 49 }, { "epoch": 0.11933174224343675, "grad_norm": 0.33767032623291016, "learning_rate": 0.00019672948630390294, "loss": 0.6493, "step": 50 }, { "epoch": 0.11933174224343675, "eval_loss": 0.8324581384658813, "eval_runtime": 26.3253, "eval_samples_per_second": 6.724, "eval_steps_per_second": 3.381, "step": 50 }, { "epoch": 0.12171837708830549, "grad_norm": 0.35318946838378906, "learning_rate": 0.00019656487088855592, "loss": 1.033, "step": 51 }, { "epoch": 0.12410501193317422, "grad_norm": 0.30267930030822754, "learning_rate": 0.00019639628606958533, "loss": 0.6305, "step": 52 }, { "epoch": 0.12649164677804295, "grad_norm": 0.30554893612861633, "learning_rate": 0.0001962237387768529, "loss": 0.7148, "step": 53 }, { "epoch": 0.1288782816229117, "grad_norm": 0.3179020583629608, "learning_rate": 0.00019604723610310194, "loss": 0.6932, "step": 54 }, { "epoch": 0.13126491646778043, "grad_norm": 0.43141505122184753, "learning_rate": 0.00019586678530366606, "loss": 0.9695, "step": 55 }, { "epoch": 0.13126491646778043, "eval_loss": 0.8304810523986816, "eval_runtime": 26.2933, "eval_samples_per_second": 6.732, "eval_steps_per_second": 3.385, "step": 55 }, { "epoch": 0.13365155131264916, "grad_norm": 0.37711283564567566, "learning_rate": 0.00019568239379617088, "loss": 1.0899, "step": 56 }, { "epoch": 0.1360381861575179, "grad_norm": 0.3021875023841858, "learning_rate": 0.00019549406916022905, "loss": 0.5727, "step": 57 }, { "epoch": 0.13842482100238662, "grad_norm": 0.4609815180301666, "learning_rate": 0.00019530181913712872, "loss": 0.8931, "step": 58 }, { "epoch": 0.14081145584725538, "grad_norm": 0.410404235124588, "learning_rate": 0.00019510565162951537, "loss": 0.9462, "step": 59 }, { "epoch": 0.1431980906921241, "grad_norm": 0.3426823318004608, "learning_rate": 0.00019490557470106686, "loss": 0.767, "step": 60 }, { "epoch": 0.1431980906921241, "eval_loss": 0.8281893730163574, "eval_runtime": 26.309, "eval_samples_per_second": 6.728, "eval_steps_per_second": 3.383, "step": 60 } ], "logging_steps": 1, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.174564302225408e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }