{ "best_metric": 1.1386715173721313, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 2.0752269779507135, "eval_steps": 25, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08300907911802853, "grad_norm": 3.467146396636963, "learning_rate": 0.0002989354846774545, "loss": 3.6542, "step": 1 }, { "epoch": 0.08300907911802853, "eval_loss": 2.9620537757873535, "eval_runtime": 1.664, "eval_samples_per_second": 30.047, "eval_steps_per_second": 7.812, "step": 1 }, { "epoch": 0.16601815823605706, "grad_norm": 4.602604866027832, "learning_rate": 0.0002957587267523652, "loss": 2.9754, "step": 2 }, { "epoch": 0.2490272373540856, "grad_norm": 9.224260330200195, "learning_rate": 0.00029051982559491393, "loss": 2.2457, "step": 3 }, { "epoch": 0.3320363164721141, "grad_norm": 6.159036636352539, "learning_rate": 0.00028330140180592156, "loss": 2.5341, "step": 4 }, { "epoch": 0.4150453955901427, "grad_norm": 6.676574230194092, "learning_rate": 0.00027421729424061787, "loss": 1.8128, "step": 5 }, { "epoch": 0.4980544747081712, "grad_norm": 3.2664153575897217, "learning_rate": 0.0002634107647018905, "loss": 1.5817, "step": 6 }, { "epoch": 0.5810635538261998, "grad_norm": 2.6292076110839844, "learning_rate": 0.00025105223861607306, "loss": 2.3494, "step": 7 }, { "epoch": 0.6640726329442282, "grad_norm": 2.5120043754577637, "learning_rate": 0.00023733661732216452, "loss": 1.5132, "step": 8 }, { "epoch": 0.7470817120622568, "grad_norm": 4.4901814460754395, "learning_rate": 0.00022248020436128478, "loss": 1.4384, "step": 9 }, { "epoch": 0.8300907911802854, "grad_norm": 1.9909392595291138, "learning_rate": 0.00020671729424061788, "loss": 2.0038, "step": 10 }, { "epoch": 0.9130998702983139, "grad_norm": 3.3288509845733643, "learning_rate": 0.00019029647746907283, "loss": 1.3536, "step": 11 }, { "epoch": 0.9961089494163424, "grad_norm": 2.519279718399048, "learning_rate": 0.0001734767201364573, "loss": 1.0483, "step": 12 }, { "epoch": 1.0791180285343709, "grad_norm": 9.918027877807617, "learning_rate": 0.0001565232798635427, "loss": 3.532, "step": 13 }, { "epoch": 1.1621271076523996, "grad_norm": 2.53695011138916, "learning_rate": 0.00013970352253092714, "loss": 1.2221, "step": 14 }, { "epoch": 1.245136186770428, "grad_norm": 2.4748690128326416, "learning_rate": 0.00012328270575938212, "loss": 1.0417, "step": 15 }, { "epoch": 1.3281452658884565, "grad_norm": 1.6321978569030762, "learning_rate": 0.00010751979563871518, "loss": 1.8484, "step": 16 }, { "epoch": 1.411154345006485, "grad_norm": 1.4795855283737183, "learning_rate": 9.266338267783541e-05, "loss": 1.0031, "step": 17 }, { "epoch": 1.4941634241245136, "grad_norm": 1.9086230993270874, "learning_rate": 7.894776138392688e-05, "loss": 0.9499, "step": 18 }, { "epoch": 1.577172503242542, "grad_norm": 1.3793696165084839, "learning_rate": 6.658923529810946e-05, "loss": 1.7622, "step": 19 }, { "epoch": 1.6601815823605706, "grad_norm": 1.6033817529678345, "learning_rate": 5.578270575938211e-05, "loss": 1.0468, "step": 20 }, { "epoch": 1.7431906614785992, "grad_norm": 2.249070167541504, "learning_rate": 4.669859819407844e-05, "loss": 0.9214, "step": 21 }, { "epoch": 1.8261997405966277, "grad_norm": 1.2679235935211182, "learning_rate": 3.948017440508607e-05, "loss": 1.6549, "step": 22 }, { "epoch": 1.9092088197146562, "grad_norm": 1.4422551393508911, "learning_rate": 3.4241273247634805e-05, "loss": 0.9849, "step": 23 }, { "epoch": 1.9922178988326849, "grad_norm": 1.48310124874115, "learning_rate": 3.10645153225455e-05, "loss": 0.9386, "step": 24 }, { "epoch": 2.0752269779507135, "grad_norm": 3.796576499938965, "learning_rate": 2.9999999999999997e-05, "loss": 3.5664, "step": 25 }, { "epoch": 2.0752269779507135, "eval_loss": 1.1386715173721313, "eval_runtime": 1.6655, "eval_samples_per_second": 30.021, "eval_steps_per_second": 7.806, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 30, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.115492263624704e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }