{ "best_metric": 0.8007434606552124, "best_model_checkpoint": "miner_id_24/checkpoint-20", "epoch": 0.011981428785382657, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005990714392691329, "grad_norm": 0.3232486844062805, "learning_rate": 2e-05, "loss": 1.0991, "step": 1 }, { "epoch": 0.0005990714392691329, "eval_loss": 1.0628771781921387, "eval_runtime": 131.644, "eval_samples_per_second": 5.34, "eval_steps_per_second": 2.674, "step": 1 }, { "epoch": 0.0011981428785382657, "grad_norm": 0.23657472431659698, "learning_rate": 4e-05, "loss": 1.067, "step": 2 }, { "epoch": 0.0017972143178073986, "grad_norm": 0.28239014744758606, "learning_rate": 6e-05, "loss": 1.0769, "step": 3 }, { "epoch": 0.0023962857570765314, "grad_norm": 0.2843663990497589, "learning_rate": 8e-05, "loss": 1.1262, "step": 4 }, { "epoch": 0.0029953571963456643, "grad_norm": 0.2925613224506378, "learning_rate": 0.0001, "loss": 0.9741, "step": 5 }, { "epoch": 0.0029953571963456643, "eval_loss": 1.048013687133789, "eval_runtime": 130.2253, "eval_samples_per_second": 5.398, "eval_steps_per_second": 2.703, "step": 5 }, { "epoch": 0.003594428635614797, "grad_norm": 0.34553882479667664, "learning_rate": 0.00012, "loss": 0.9914, "step": 6 }, { "epoch": 0.00419350007488393, "grad_norm": 0.33732372522354126, "learning_rate": 0.00014, "loss": 1.0054, "step": 7 }, { "epoch": 0.004792571514153063, "grad_norm": 0.41986826062202454, "learning_rate": 0.00016, "loss": 0.9714, "step": 8 }, { "epoch": 0.005391642953422196, "grad_norm": 0.28040024638175964, "learning_rate": 0.00018, "loss": 1.0071, "step": 9 }, { "epoch": 0.0059907143926913285, "grad_norm": 0.3935498297214508, "learning_rate": 0.0002, "loss": 0.9173, "step": 10 }, { "epoch": 0.0059907143926913285, "eval_loss": 0.9063844680786133, "eval_runtime": 129.7311, "eval_samples_per_second": 5.419, "eval_steps_per_second": 2.713, "step": 10 }, { "epoch": 0.006589785831960461, "grad_norm": 0.31022465229034424, "learning_rate": 0.00019781476007338058, "loss": 0.8604, "step": 11 }, { "epoch": 0.007188857271229594, "grad_norm": 0.4046589732170105, "learning_rate": 0.0001913545457642601, "loss": 0.9009, "step": 12 }, { "epoch": 0.007787928710498727, "grad_norm": 0.5031927824020386, "learning_rate": 0.00018090169943749476, "loss": 0.876, "step": 13 }, { "epoch": 0.00838700014976786, "grad_norm": 0.4806121289730072, "learning_rate": 0.00016691306063588583, "loss": 0.9419, "step": 14 }, { "epoch": 0.008986071589036993, "grad_norm": 0.35671281814575195, "learning_rate": 0.00015000000000000001, "loss": 0.8153, "step": 15 }, { "epoch": 0.008986071589036993, "eval_loss": 0.8290239572525024, "eval_runtime": 130.5907, "eval_samples_per_second": 5.383, "eval_steps_per_second": 2.695, "step": 15 }, { "epoch": 0.009585143028306126, "grad_norm": 0.3003746569156647, "learning_rate": 0.00013090169943749476, "loss": 0.8158, "step": 16 }, { "epoch": 0.010184214467575258, "grad_norm": 0.2912845015525818, "learning_rate": 0.00011045284632676536, "loss": 0.804, "step": 17 }, { "epoch": 0.010783285906844391, "grad_norm": 0.31682682037353516, "learning_rate": 8.954715367323468e-05, "loss": 0.8039, "step": 18 }, { "epoch": 0.011382357346113524, "grad_norm": 0.3057119846343994, "learning_rate": 6.909830056250527e-05, "loss": 0.8771, "step": 19 }, { "epoch": 0.011981428785382657, "grad_norm": 0.28653478622436523, "learning_rate": 5.000000000000002e-05, "loss": 0.8927, "step": 20 }, { "epoch": 0.011981428785382657, "eval_loss": 0.8007434606552124, "eval_runtime": 135.9653, "eval_samples_per_second": 5.17, "eval_steps_per_second": 2.589, "step": 20 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6591291693465600.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }