{ "best_metric": 2.5499963760375977, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.0005255919479313577, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.7519731597711922e-05, "eval_loss": 3.9743845462799072, "eval_runtime": 1381.4735, "eval_samples_per_second": 17.397, "eval_steps_per_second": 8.699, "step": 1 }, { "epoch": 5.2559194793135767e-05, "grad_norm": 1.904785394668579, "learning_rate": 0.00012, "loss": 3.8515, "step": 3 }, { "epoch": 8.759865798855962e-05, "eval_loss": 3.40993595123291, "eval_runtime": 1379.2014, "eval_samples_per_second": 17.426, "eval_steps_per_second": 8.713, "step": 5 }, { "epoch": 0.00010511838958627153, "grad_norm": 2.348266839981079, "learning_rate": 0.0001992114701314478, "loss": 3.6623, "step": 6 }, { "epoch": 0.0001576775843794073, "grad_norm": 3.9737961292266846, "learning_rate": 0.00018763066800438636, "loss": 2.7532, "step": 9 }, { "epoch": 0.00017519731597711924, "eval_loss": 2.764575719833374, "eval_runtime": 1380.799, "eval_samples_per_second": 17.406, "eval_steps_per_second": 8.703, "step": 10 }, { "epoch": 0.00021023677917254307, "grad_norm": 3.4274017810821533, "learning_rate": 0.000163742398974869, "loss": 2.8338, "step": 12 }, { "epoch": 0.00026279597396567885, "grad_norm": 2.62567138671875, "learning_rate": 0.00013090169943749476, "loss": 2.7121, "step": 15 }, { "epoch": 0.00026279597396567885, "eval_loss": 2.642612934112549, "eval_runtime": 1381.9033, "eval_samples_per_second": 17.392, "eval_steps_per_second": 8.696, "step": 15 }, { "epoch": 0.0003153551687588146, "grad_norm": 2.2296853065490723, "learning_rate": 9.372094804706867e-05, "loss": 2.6752, "step": 18 }, { "epoch": 0.00035039463195423847, "eval_loss": 2.568502187728882, "eval_runtime": 1380.1266, "eval_samples_per_second": 17.414, "eval_steps_per_second": 8.707, "step": 20 }, { "epoch": 0.0003679143635519504, "grad_norm": 2.19140362739563, "learning_rate": 5.7422070843492734e-05, "loss": 2.7154, "step": 21 }, { "epoch": 0.00042047355834508613, "grad_norm": 2.177623987197876, "learning_rate": 2.7103137257858868e-05, "loss": 2.3971, "step": 24 }, { "epoch": 0.0004379932899427981, "eval_loss": 2.5534322261810303, "eval_runtime": 1380.0307, "eval_samples_per_second": 17.416, "eval_steps_per_second": 8.708, "step": 25 }, { "epoch": 0.00047303275313822195, "grad_norm": 1.7770730257034302, "learning_rate": 7.022351411174866e-06, "loss": 2.617, "step": 27 }, { "epoch": 0.0005255919479313577, "grad_norm": 1.8957409858703613, "learning_rate": 0.0, "loss": 2.5408, "step": 30 }, { "epoch": 0.0005255919479313577, "eval_loss": 2.5499963760375977, "eval_runtime": 1380.394, "eval_samples_per_second": 17.411, "eval_steps_per_second": 8.705, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.112830925340672e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }