{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.011463250168577209, "eval_steps": 34, "global_step": 34, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00033715441672285906, "eval_loss": 2.3532843589782715, "eval_runtime": 338.8675, "eval_samples_per_second": 14.74, "eval_steps_per_second": 1.844, "step": 1 }, { "epoch": 0.0010114632501685772, "grad_norm": 0.8756303787231445, "learning_rate": 1.5e-05, "loss": 2.361, "step": 3 }, { "epoch": 0.0020229265003371545, "grad_norm": 0.9312941431999207, "learning_rate": 3e-05, "loss": 2.4681, "step": 6 }, { "epoch": 0.0030343897505057315, "grad_norm": 0.9662004709243774, "learning_rate": 4.5e-05, "loss": 2.1782, "step": 9 }, { "epoch": 0.004045853000674309, "grad_norm": 1.3127541542053223, "learning_rate": 4.999675562428437e-05, "loss": 2.2872, "step": 12 }, { "epoch": 0.0050573162508428865, "grad_norm": 1.5349361896514893, "learning_rate": 4.9979724954289244e-05, "loss": 1.8232, "step": 15 }, { "epoch": 0.006068779501011463, "grad_norm": 1.5614502429962158, "learning_rate": 4.994810682835951e-05, "loss": 1.5184, "step": 18 }, { "epoch": 0.0070802427511800405, "grad_norm": 1.413482904434204, "learning_rate": 4.990191971059033e-05, "loss": 1.2261, "step": 21 }, { "epoch": 0.008091706001348618, "grad_norm": 1.2501227855682373, "learning_rate": 4.984119057295783e-05, "loss": 1.1656, "step": 24 }, { "epoch": 0.009103169251517195, "grad_norm": 1.0704395771026611, "learning_rate": 4.976595487956823e-05, "loss": 1.0296, "step": 27 }, { "epoch": 0.010114632501685773, "grad_norm": 0.8690694570541382, "learning_rate": 4.967625656594782e-05, "loss": 0.834, "step": 30 }, { "epoch": 0.011126095751854349, "grad_norm": 0.9992819428443909, "learning_rate": 4.957214801338581e-05, "loss": 0.9255, "step": 33 }, { "epoch": 0.011463250168577209, "eval_loss": 0.8507078289985657, "eval_runtime": 341.3748, "eval_samples_per_second": 14.632, "eval_steps_per_second": 1.831, "step": 34 } ], "logging_steps": 3, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 34, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.753463114622566e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }