{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.007974481658692184, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001594896331738437, "eval_loss": 1.199280023574829, "eval_runtime": 225.5899, "eval_samples_per_second": 11.703, "eval_steps_per_second": 5.851, "step": 1 }, { "epoch": 0.0007974481658692185, "grad_norm": 0.43941763043403625, "learning_rate": 5e-05, "loss": 1.2094, "step": 5 }, { "epoch": 0.001594896331738437, "grad_norm": 0.3336985409259796, "learning_rate": 0.0001, "loss": 1.1356, "step": 10 }, { "epoch": 0.001594896331738437, "eval_loss": 1.15956711769104, "eval_runtime": 232.8327, "eval_samples_per_second": 11.339, "eval_steps_per_second": 5.669, "step": 10 }, { "epoch": 0.0023923444976076554, "grad_norm": 0.4425758719444275, "learning_rate": 9.619397662556435e-05, "loss": 1.1819, "step": 15 }, { "epoch": 0.003189792663476874, "grad_norm": 0.593981921672821, "learning_rate": 8.535533905932738e-05, "loss": 1.0736, "step": 20 }, { "epoch": 0.003189792663476874, "eval_loss": 1.065757155418396, "eval_runtime": 232.901, "eval_samples_per_second": 11.335, "eval_steps_per_second": 5.668, "step": 20 }, { "epoch": 0.003987240829346092, "grad_norm": 0.3362281918525696, "learning_rate": 6.91341716182545e-05, "loss": 1.115, "step": 25 }, { "epoch": 0.004784688995215311, "grad_norm": 0.4011121690273285, "learning_rate": 5e-05, "loss": 1.0856, "step": 30 }, { "epoch": 0.004784688995215311, "eval_loss": 1.0489628314971924, "eval_runtime": 232.9203, "eval_samples_per_second": 11.334, "eval_steps_per_second": 5.667, "step": 30 }, { "epoch": 0.005582137161084529, "grad_norm": 0.4398060441017151, "learning_rate": 3.086582838174551e-05, "loss": 1.0603, "step": 35 }, { "epoch": 0.006379585326953748, "grad_norm": 0.5486422777175903, "learning_rate": 1.4644660940672627e-05, "loss": 1.0651, "step": 40 }, { "epoch": 0.006379585326953748, "eval_loss": 1.0430271625518799, "eval_runtime": 232.8498, "eval_samples_per_second": 11.338, "eval_steps_per_second": 5.669, "step": 40 }, { "epoch": 0.007177033492822967, "grad_norm": 0.3932763338088989, "learning_rate": 3.8060233744356633e-06, "loss": 1.0669, "step": 45 }, { "epoch": 0.007974481658692184, "grad_norm": 0.3477610945701599, "learning_rate": 0.0, "loss": 0.9524, "step": 50 }, { "epoch": 0.007974481658692184, "eval_loss": 1.0419983863830566, "eval_runtime": 232.7443, "eval_samples_per_second": 11.343, "eval_steps_per_second": 5.671, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9039468904316928.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }