{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22870211549456831, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011435105774728416, "eval_loss": NaN, "eval_runtime": 22.7683, "eval_samples_per_second": 16.207, "eval_steps_per_second": 8.125, "step": 1 }, { "epoch": 0.011435105774728416, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 1.4199, "step": 10 }, { "epoch": 0.022870211549456832, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 28.5375, "step": 20 }, { "epoch": 0.03430531732418525, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 19.8312, "step": 30 }, { "epoch": 0.045740423098913664, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 79.4673, "step": 40 }, { "epoch": 0.05717552887364208, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 18.5405, "step": 50 }, { "epoch": 0.05717552887364208, "eval_loss": NaN, "eval_runtime": 21.0699, "eval_samples_per_second": 17.513, "eval_steps_per_second": 8.78, "step": 50 }, { "epoch": 0.0686106346483705, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 90.8954, "step": 60 }, { "epoch": 0.08004574042309891, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 18.4395, "step": 70 }, { "epoch": 0.09148084619782733, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 9.3505, "step": 80 }, { "epoch": 0.10291595197255575, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 9.4442, "step": 90 }, { "epoch": 0.11435105774728416, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 12.0121, "step": 100 }, { "epoch": 0.11435105774728416, "eval_loss": NaN, "eval_runtime": 21.0675, "eval_samples_per_second": 17.515, "eval_steps_per_second": 8.781, "step": 100 }, { "epoch": 0.12578616352201258, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 9.6699, "step": 110 }, { "epoch": 0.137221269296741, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 14.4873, "step": 120 }, { "epoch": 0.14865637507146942, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 11.8371, "step": 130 }, { "epoch": 0.16009148084619781, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 5.245, "step": 140 }, { "epoch": 0.17152658662092624, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 26.7057, "step": 150 }, { "epoch": 0.17152658662092624, "eval_loss": NaN, "eval_runtime": 21.0628, "eval_samples_per_second": 17.519, "eval_steps_per_second": 8.783, "step": 150 }, { "epoch": 0.18296169239565466, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 16.942, "step": 160 }, { "epoch": 0.19439679817038308, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 8.9252, "step": 170 }, { "epoch": 0.2058319039451115, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 16.2143, "step": 180 }, { "epoch": 0.21726700971983992, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 19.0338, "step": 190 }, { "epoch": 0.22870211549456831, "grad_norm": NaN, "learning_rate": 0.0002, "loss": 17.9537, "step": 200 }, { "epoch": 0.22870211549456831, "eval_loss": NaN, "eval_runtime": 21.0805, "eval_samples_per_second": 17.504, "eval_steps_per_second": 8.776, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.474218216620032e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }