{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.444275234513159e-05, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.444275234513159e-06, "grad_norm": 0.13059046864509583, "learning_rate": 2e-05, "loss": 10.8347, "step": 1 }, { "epoch": 9.444275234513159e-06, "eval_loss": 10.834060668945312, "eval_runtime": 143.1612, "eval_samples_per_second": 311.418, "eval_steps_per_second": 155.713, "step": 1 }, { "epoch": 1.8888550469026318e-05, "grad_norm": 0.13683012127876282, "learning_rate": 4e-05, "loss": 10.8284, "step": 2 }, { "epoch": 2.8332825703539477e-05, "grad_norm": 0.10735035687685013, "learning_rate": 6e-05, "loss": 10.8353, "step": 3 }, { "epoch": 2.8332825703539477e-05, "eval_loss": 10.833964347839355, "eval_runtime": 142.0935, "eval_samples_per_second": 313.758, "eval_steps_per_second": 156.883, "step": 3 }, { "epoch": 3.7777100938052636e-05, "grad_norm": 0.13603197038173676, "learning_rate": 8e-05, "loss": 10.8475, "step": 4 }, { "epoch": 4.7221376172565795e-05, "grad_norm": 0.1254109889268875, "learning_rate": 0.0001, "loss": 10.8279, "step": 5 }, { "epoch": 5.6665651407078953e-05, "grad_norm": 0.13037869334220886, "learning_rate": 0.00012, "loss": 10.8403, "step": 6 }, { "epoch": 5.6665651407078953e-05, "eval_loss": 10.8331298828125, "eval_runtime": 141.3773, "eval_samples_per_second": 315.348, "eval_steps_per_second": 157.677, "step": 6 }, { "epoch": 6.610992664159211e-05, "grad_norm": 0.17597252130508423, "learning_rate": 0.00014, "loss": 10.8405, "step": 7 }, { "epoch": 7.555420187610527e-05, "grad_norm": 0.18065564334392548, "learning_rate": 0.00016, "loss": 10.8292, "step": 8 }, { "epoch": 8.499847711061843e-05, "grad_norm": 0.1311742663383484, "learning_rate": 0.00018, "loss": 10.8247, "step": 9 }, { "epoch": 8.499847711061843e-05, "eval_loss": 10.83173656463623, "eval_runtime": 139.6685, "eval_samples_per_second": 319.206, "eval_steps_per_second": 159.607, "step": 9 }, { "epoch": 9.444275234513159e-05, "grad_norm": 0.15824280679225922, "learning_rate": 0.0002, "loss": 10.8336, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1086891884544.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }