{ "best_metric": 3.3031609058380127, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.000383136869260929, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.2771228975364299e-05, "eval_loss": 3.761971950531006, "eval_runtime": 495.2638, "eval_samples_per_second": 66.569, "eval_steps_per_second": 33.285, "step": 1 }, { "epoch": 3.8313686926092896e-05, "grad_norm": 1.6783641576766968, "learning_rate": 0.00012, "loss": 15.0748, "step": 3 }, { "epoch": 6.38561448768215e-05, "eval_loss": 3.731351852416992, "eval_runtime": 495.902, "eval_samples_per_second": 66.483, "eval_steps_per_second": 33.242, "step": 5 }, { "epoch": 7.662737385218579e-05, "grad_norm": 1.7251548767089844, "learning_rate": 0.0001992114701314478, "loss": 14.8657, "step": 6 }, { "epoch": 0.0001149410607782787, "grad_norm": 2.13313364982605, "learning_rate": 0.00018763066800438636, "loss": 14.9604, "step": 9 }, { "epoch": 0.000127712289753643, "eval_loss": 3.5326249599456787, "eval_runtime": 494.84, "eval_samples_per_second": 66.626, "eval_steps_per_second": 33.314, "step": 10 }, { "epoch": 0.00015325474770437158, "grad_norm": 1.8235368728637695, "learning_rate": 0.000163742398974869, "loss": 14.243, "step": 12 }, { "epoch": 0.0001915684346304645, "grad_norm": 2.1185615062713623, "learning_rate": 0.00013090169943749476, "loss": 13.4274, "step": 15 }, { "epoch": 0.0001915684346304645, "eval_loss": 3.4010062217712402, "eval_runtime": 494.8886, "eval_samples_per_second": 66.619, "eval_steps_per_second": 33.311, "step": 15 }, { "epoch": 0.0002298821215565574, "grad_norm": 2.2047791481018066, "learning_rate": 9.372094804706867e-05, "loss": 13.6264, "step": 18 }, { "epoch": 0.000255424579507286, "eval_loss": 3.337207555770874, "eval_runtime": 495.0413, "eval_samples_per_second": 66.598, "eval_steps_per_second": 33.3, "step": 20 }, { "epoch": 0.0002681958084826503, "grad_norm": 2.2516863346099854, "learning_rate": 5.7422070843492734e-05, "loss": 13.2387, "step": 21 }, { "epoch": 0.00030650949540874317, "grad_norm": 2.9069862365722656, "learning_rate": 2.7103137257858868e-05, "loss": 13.1951, "step": 24 }, { "epoch": 0.00031928072438410746, "eval_loss": 3.3083837032318115, "eval_runtime": 494.7436, "eval_samples_per_second": 66.639, "eval_steps_per_second": 33.32, "step": 25 }, { "epoch": 0.0003448231823348361, "grad_norm": 2.1830813884735107, "learning_rate": 7.022351411174866e-06, "loss": 13.1907, "step": 27 }, { "epoch": 0.000383136869260929, "grad_norm": 2.640789270401001, "learning_rate": 0.0, "loss": 13.069, "step": 30 }, { "epoch": 0.000383136869260929, "eval_loss": 3.3031609058380127, "eval_runtime": 494.7185, "eval_samples_per_second": 66.642, "eval_steps_per_second": 33.322, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 129293195673600.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }