{ "best_metric": 1.7998709678649902, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.5698005698005698, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011396011396011397, "grad_norm": 1.2519962787628174, "learning_rate": 2.9999999999999997e-05, "loss": 2.4206, "step": 1 }, { "epoch": 0.011396011396011397, "eval_loss": 3.1777703762054443, "eval_runtime": 2.6753, "eval_samples_per_second": 18.689, "eval_steps_per_second": 2.617, "step": 1 }, { "epoch": 0.022792022792022793, "grad_norm": 1.8308161497116089, "learning_rate": 5.9999999999999995e-05, "loss": 2.5791, "step": 2 }, { "epoch": 0.03418803418803419, "grad_norm": 2.0269980430603027, "learning_rate": 8.999999999999999e-05, "loss": 2.6558, "step": 3 }, { "epoch": 0.045584045584045586, "grad_norm": 2.5735702514648438, "learning_rate": 0.00011999999999999999, "loss": 2.8524, "step": 4 }, { "epoch": 0.05698005698005698, "grad_norm": 2.610076904296875, "learning_rate": 0.00015, "loss": 2.6163, "step": 5 }, { "epoch": 0.06837606837606838, "grad_norm": 2.135373830795288, "learning_rate": 0.00017999999999999998, "loss": 2.5374, "step": 6 }, { "epoch": 0.07977207977207977, "grad_norm": 1.9062364101409912, "learning_rate": 0.00020999999999999998, "loss": 2.6469, "step": 7 }, { "epoch": 0.09116809116809117, "grad_norm": 1.4977805614471436, "learning_rate": 0.00023999999999999998, "loss": 2.2611, "step": 8 }, { "epoch": 0.10256410256410256, "grad_norm": 1.4389641284942627, "learning_rate": 0.00027, "loss": 2.2009, "step": 9 }, { "epoch": 0.11396011396011396, "grad_norm": 1.9085431098937988, "learning_rate": 0.0003, "loss": 2.2492, "step": 10 }, { "epoch": 0.12535612535612536, "grad_norm": 1.7611523866653442, "learning_rate": 0.0002999731384004606, "loss": 2.2406, "step": 11 }, { "epoch": 0.13675213675213677, "grad_norm": 1.9762187004089355, "learning_rate": 0.0002998925632224497, "loss": 2.21, "step": 12 }, { "epoch": 0.14814814814814814, "grad_norm": 1.983385443687439, "learning_rate": 0.00029975830332434265, "loss": 1.9138, "step": 13 }, { "epoch": 0.15954415954415954, "grad_norm": 2.5348525047302246, "learning_rate": 0.00029957040679194776, "loss": 2.0607, "step": 14 }, { "epoch": 0.17094017094017094, "grad_norm": 1.8772743940353394, "learning_rate": 0.00029932894092128383, "loss": 1.7761, "step": 15 }, { "epoch": 0.18233618233618235, "grad_norm": 1.8809691667556763, "learning_rate": 0.0002990339921944777, "loss": 1.7878, "step": 16 }, { "epoch": 0.19373219373219372, "grad_norm": 2.102220296859741, "learning_rate": 0.00029868566624879054, "loss": 2.2667, "step": 17 }, { "epoch": 0.20512820512820512, "grad_norm": 2.032646894454956, "learning_rate": 0.00029828408783878324, "loss": 1.6751, "step": 18 }, { "epoch": 0.21652421652421652, "grad_norm": 2.541229486465454, "learning_rate": 0.00029782940079163485, "loss": 1.3936, "step": 19 }, { "epoch": 0.22792022792022792, "grad_norm": 3.479897975921631, "learning_rate": 0.00029732176795563037, "loss": 1.7468, "step": 20 }, { "epoch": 0.23931623931623933, "grad_norm": 4.591503143310547, "learning_rate": 0.0002967613711418359, "loss": 2.2668, "step": 21 }, { "epoch": 0.25071225071225073, "grad_norm": 7.106602668762207, "learning_rate": 0.000296148411058982, "loss": 2.8446, "step": 22 }, { "epoch": 0.2621082621082621, "grad_norm": 4.072912693023682, "learning_rate": 0.00029548310724157904, "loss": 2.4782, "step": 23 }, { "epoch": 0.27350427350427353, "grad_norm": 3.0776100158691406, "learning_rate": 0.0002947656979712899, "loss": 2.3275, "step": 24 }, { "epoch": 0.2849002849002849, "grad_norm": 1.9854662418365479, "learning_rate": 0.0002939964401915884, "loss": 2.2914, "step": 25 }, { "epoch": 0.2849002849002849, "eval_loss": 2.036309242248535, "eval_runtime": 1.6991, "eval_samples_per_second": 29.427, "eval_steps_per_second": 4.12, "step": 25 }, { "epoch": 0.2962962962962963, "grad_norm": 1.2816338539123535, "learning_rate": 0.0002931756094157332, "loss": 2.0158, "step": 26 }, { "epoch": 0.3076923076923077, "grad_norm": 1.3689543008804321, "learning_rate": 0.0002923034996280924, "loss": 1.9286, "step": 27 }, { "epoch": 0.3190883190883191, "grad_norm": 1.3031697273254395, "learning_rate": 0.0002913804231788509, "loss": 2.0447, "step": 28 }, { "epoch": 0.33048433048433046, "grad_norm": 1.2884080410003662, "learning_rate": 0.00029040671067214087, "loss": 1.7308, "step": 29 }, { "epoch": 0.3418803418803419, "grad_norm": 1.2418413162231445, "learning_rate": 0.0002893827108476348, "loss": 1.7873, "step": 30 }, { "epoch": 0.35327635327635326, "grad_norm": 1.2331762313842773, "learning_rate": 0.000288308790455642, "loss": 1.8249, "step": 31 }, { "epoch": 0.3646723646723647, "grad_norm": 1.3798717260360718, "learning_rate": 0.00028718533412575606, "loss": 1.6925, "step": 32 }, { "epoch": 0.37606837606837606, "grad_norm": 1.2883918285369873, "learning_rate": 0.00028601274422909733, "loss": 1.6585, "step": 33 }, { "epoch": 0.38746438746438744, "grad_norm": 1.4337915182113647, "learning_rate": 0.00028479144073420234, "loss": 1.6603, "step": 34 }, { "epoch": 0.39886039886039887, "grad_norm": 1.4459898471832275, "learning_rate": 0.0002835218610566095, "loss": 1.811, "step": 35 }, { "epoch": 0.41025641025641024, "grad_norm": 1.5042760372161865, "learning_rate": 0.0002822044599021973, "loss": 1.5776, "step": 36 }, { "epoch": 0.42165242165242167, "grad_norm": 1.721915364265442, "learning_rate": 0.0002808397091043291, "loss": 1.5148, "step": 37 }, { "epoch": 0.43304843304843305, "grad_norm": 1.7263678312301636, "learning_rate": 0.00027942809745486343, "loss": 1.6432, "step": 38 }, { "epoch": 0.4444444444444444, "grad_norm": 1.4137566089630127, "learning_rate": 0.0002779701305290915, "loss": 1.1639, "step": 39 }, { "epoch": 0.45584045584045585, "grad_norm": 1.5665677785873413, "learning_rate": 0.00027646633050466265, "loss": 1.2632, "step": 40 }, { "epoch": 0.4672364672364672, "grad_norm": 2.0215096473693848, "learning_rate": 0.0002749172359745641, "loss": 1.3551, "step": 41 }, { "epoch": 0.47863247863247865, "grad_norm": 4.784286975860596, "learning_rate": 0.0002733234017542215, "loss": 2.3304, "step": 42 }, { "epoch": 0.49002849002849, "grad_norm": 4.08688497543335, "learning_rate": 0.0002716853986827888, "loss": 2.402, "step": 43 }, { "epoch": 0.5014245014245015, "grad_norm": 2.5830602645874023, "learning_rate": 0.0002700038134187002, "loss": 2.3759, "step": 44 }, { "epoch": 0.5128205128205128, "grad_norm": 2.1645848751068115, "learning_rate": 0.00026827924822955487, "loss": 2.1305, "step": 45 }, { "epoch": 0.5242165242165242, "grad_norm": 1.5849443674087524, "learning_rate": 0.0002665123207764128, "loss": 1.9809, "step": 46 }, { "epoch": 0.5356125356125356, "grad_norm": 1.2589260339736938, "learning_rate": 0.00026470366389257614, "loss": 1.706, "step": 47 }, { "epoch": 0.5470085470085471, "grad_norm": 1.0531516075134277, "learning_rate": 0.0002628539253569372, "loss": 1.9793, "step": 48 }, { "epoch": 0.5584045584045584, "grad_norm": 1.193375587463379, "learning_rate": 0.00026096376766197307, "loss": 1.9775, "step": 49 }, { "epoch": 0.5698005698005698, "grad_norm": 1.1033531427383423, "learning_rate": 0.00025903386777647154, "loss": 1.8742, "step": 50 }, { "epoch": 0.5698005698005698, "eval_loss": 1.7998709678649902, "eval_runtime": 1.7036, "eval_samples_per_second": 29.349, "eval_steps_per_second": 4.109, "step": 50 } ], "logging_steps": 1, "max_steps": 176, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2522914467282944e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }