{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.948356807511737, "eval_steps": 500, "global_step": 78, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3755868544600939, "grad_norm": 7.420562088875575, "learning_rate": 5e-06, "loss": 1.1267, "step": 10 }, { "epoch": 0.7511737089201878, "grad_norm": 1.9326575993897155, "learning_rate": 5e-06, "loss": 1.0283, "step": 20 }, { "epoch": 0.9765258215962441, "eval_loss": 0.9831027984619141, "eval_runtime": 29.9012, "eval_samples_per_second": 23.946, "eval_steps_per_second": 0.401, "step": 26 }, { "epoch": 1.136150234741784, "grad_norm": 1.1316893276757012, "learning_rate": 5e-06, "loss": 1.039, "step": 30 }, { "epoch": 1.511737089201878, "grad_norm": 0.9870625157508757, "learning_rate": 5e-06, "loss": 0.9304, "step": 40 }, { "epoch": 1.887323943661972, "grad_norm": 1.1888711767104414, "learning_rate": 5e-06, "loss": 0.914, "step": 50 }, { "epoch": 1.9624413145539905, "eval_loss": 0.9372476935386658, "eval_runtime": 30.023, "eval_samples_per_second": 23.848, "eval_steps_per_second": 0.4, "step": 52 }, { "epoch": 2.272300469483568, "grad_norm": 1.4024339606578018, "learning_rate": 5e-06, "loss": 0.9248, "step": 60 }, { "epoch": 2.647887323943662, "grad_norm": 1.366121091039955, "learning_rate": 5e-06, "loss": 0.8388, "step": 70 }, { "epoch": 2.948356807511737, "eval_loss": 0.9231159687042236, "eval_runtime": 29.1774, "eval_samples_per_second": 24.54, "eval_steps_per_second": 0.411, "step": 78 }, { "epoch": 2.948356807511737, "step": 78, "total_flos": 130443525488640.0, "train_loss": 0.9576788682204026, "train_runtime": 4829.447, "train_samples_per_second": 8.445, "train_steps_per_second": 0.016 } ], "logging_steps": 10, "max_steps": 78, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 130443525488640.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }