{ "best_metric": 0.7479696273803711, "best_model_checkpoint": "miner_id_24/checkpoint-20", "epoch": 0.0477326968973747, "eval_steps": 5, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002386634844868735, "grad_norm": 0.8287795186042786, "learning_rate": 2e-05, "loss": 0.8038, "step": 1 }, { "epoch": 0.002386634844868735, "eval_loss": 0.8742901682853699, "eval_runtime": 88.9007, "eval_samples_per_second": 1.991, "eval_steps_per_second": 1.001, "step": 1 }, { "epoch": 0.00477326968973747, "grad_norm": 0.2608904540538788, "learning_rate": 4e-05, "loss": 0.7002, "step": 2 }, { "epoch": 0.007159904534606206, "grad_norm": 0.3414213955402374, "learning_rate": 6e-05, "loss": 0.7874, "step": 3 }, { "epoch": 0.00954653937947494, "grad_norm": 0.3702467978000641, "learning_rate": 8e-05, "loss": 0.6293, "step": 4 }, { "epoch": 0.011933174224343675, "grad_norm": 0.3586605191230774, "learning_rate": 0.0001, "loss": 0.6539, "step": 5 }, { "epoch": 0.011933174224343675, "eval_loss": 0.8658517003059387, "eval_runtime": 88.9025, "eval_samples_per_second": 1.991, "eval_steps_per_second": 1.001, "step": 5 }, { "epoch": 0.014319809069212411, "grad_norm": 0.42771315574645996, "learning_rate": 0.00012, "loss": 0.7592, "step": 6 }, { "epoch": 0.016706443914081145, "grad_norm": 0.5302035808563232, "learning_rate": 0.00014, "loss": 0.6993, "step": 7 }, { "epoch": 0.01909307875894988, "grad_norm": 0.418961763381958, "learning_rate": 0.00016, "loss": 0.9279, "step": 8 }, { "epoch": 0.021479713603818614, "grad_norm": 0.32545751333236694, "learning_rate": 0.00018, "loss": 0.5822, "step": 9 }, { "epoch": 0.02386634844868735, "grad_norm": 0.3017027974128723, "learning_rate": 0.0002, "loss": 0.6542, "step": 10 }, { "epoch": 0.02386634844868735, "eval_loss": 0.7966330051422119, "eval_runtime": 88.9654, "eval_samples_per_second": 1.99, "eval_steps_per_second": 1.0, "step": 10 }, { "epoch": 0.026252983293556086, "grad_norm": 1.3630088567733765, "learning_rate": 0.0001999979446958366, "loss": 0.8378, "step": 11 }, { "epoch": 0.028639618138424822, "grad_norm": 0.4510791301727295, "learning_rate": 0.00019999177886783194, "loss": 0.8985, "step": 12 }, { "epoch": 0.031026252983293555, "grad_norm": 0.5864933729171753, "learning_rate": 0.00019998150276943902, "loss": 0.6781, "step": 13 }, { "epoch": 0.03341288782816229, "grad_norm": 0.423950731754303, "learning_rate": 0.000199967116823068, "loss": 0.5872, "step": 14 }, { "epoch": 0.03579952267303103, "grad_norm": 0.5833825469017029, "learning_rate": 0.0001999486216200688, "loss": 0.7589, "step": 15 }, { "epoch": 0.03579952267303103, "eval_loss": 0.7643417119979858, "eval_runtime": 88.8878, "eval_samples_per_second": 1.991, "eval_steps_per_second": 1.001, "step": 15 }, { "epoch": 0.03818615751789976, "grad_norm": 0.368539035320282, "learning_rate": 0.00019992601792070679, "loss": 0.684, "step": 16 }, { "epoch": 0.0405727923627685, "grad_norm": 0.4044187664985657, "learning_rate": 0.00019989930665413147, "loss": 0.7617, "step": 17 }, { "epoch": 0.04295942720763723, "grad_norm": 0.590376079082489, "learning_rate": 0.00019986848891833845, "loss": 0.7967, "step": 18 }, { "epoch": 0.045346062052505964, "grad_norm": 0.3635483384132385, "learning_rate": 0.0001998335659801241, "loss": 0.5567, "step": 19 }, { "epoch": 0.0477326968973747, "grad_norm": 0.5464037656784058, "learning_rate": 0.00019979453927503364, "loss": 0.6186, "step": 20 }, { "epoch": 0.0477326968973747, "eval_loss": 0.7479696273803711, "eval_runtime": 88.8942, "eval_samples_per_second": 1.991, "eval_steps_per_second": 1.001, "step": 20 } ], "logging_steps": 1, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.984041808658432e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }