{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.14316392269148176, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007158196134574087, "eval_loss": 0.7471321821212769, "eval_runtime": 35.0025, "eval_samples_per_second": 16.827, "eval_steps_per_second": 8.428, "step": 1 }, { "epoch": 0.0071581961345740875, "grad_norm": 0.3041437268257141, "learning_rate": 0.0002, "loss": 0.6256, "step": 10 }, { "epoch": 0.014316392269148175, "grad_norm": 0.21548660099506378, "learning_rate": 0.0002, "loss": 0.4944, "step": 20 }, { "epoch": 0.021474588403722263, "grad_norm": 0.302517831325531, "learning_rate": 0.0002, "loss": 0.5196, "step": 30 }, { "epoch": 0.02863278453829635, "grad_norm": 0.2508035898208618, "learning_rate": 0.0002, "loss": 0.4403, "step": 40 }, { "epoch": 0.03579098067287044, "grad_norm": 0.20165234804153442, "learning_rate": 0.0002, "loss": 0.4687, "step": 50 }, { "epoch": 0.03579098067287044, "eval_loss": 0.4433478116989136, "eval_runtime": 33.6585, "eval_samples_per_second": 17.499, "eval_steps_per_second": 8.765, "step": 50 }, { "epoch": 0.04294917680744453, "grad_norm": 0.19860732555389404, "learning_rate": 0.0002, "loss": 0.4482, "step": 60 }, { "epoch": 0.05010737294201861, "grad_norm": 0.2684027850627899, "learning_rate": 0.0002, "loss": 0.4554, "step": 70 }, { "epoch": 0.0572655690765927, "grad_norm": 0.2177385836839676, "learning_rate": 0.0002, "loss": 0.432, "step": 80 }, { "epoch": 0.06442376521116679, "grad_norm": 0.2141849547624588, "learning_rate": 0.0002, "loss": 0.4056, "step": 90 }, { "epoch": 0.07158196134574088, "grad_norm": 0.22702062129974365, "learning_rate": 0.0002, "loss": 0.4283, "step": 100 }, { "epoch": 0.07158196134574088, "eval_loss": 0.4245603382587433, "eval_runtime": 33.7143, "eval_samples_per_second": 17.47, "eval_steps_per_second": 8.75, "step": 100 }, { "epoch": 0.07874015748031496, "grad_norm": 0.23127739131450653, "learning_rate": 0.0002, "loss": 0.4524, "step": 110 }, { "epoch": 0.08589835361488905, "grad_norm": 0.231285959482193, "learning_rate": 0.0002, "loss": 0.4068, "step": 120 }, { "epoch": 0.09305654974946313, "grad_norm": 0.22599922120571136, "learning_rate": 0.0002, "loss": 0.4394, "step": 130 }, { "epoch": 0.10021474588403723, "grad_norm": 0.22092868387699127, "learning_rate": 0.0002, "loss": 0.4272, "step": 140 }, { "epoch": 0.1073729420186113, "grad_norm": 0.2594935894012451, "learning_rate": 0.0002, "loss": 0.4254, "step": 150 }, { "epoch": 0.1073729420186113, "eval_loss": 0.4146997332572937, "eval_runtime": 33.7342, "eval_samples_per_second": 17.46, "eval_steps_per_second": 8.745, "step": 150 }, { "epoch": 0.1145311381531854, "grad_norm": 0.25014057755470276, "learning_rate": 0.0002, "loss": 0.4102, "step": 160 }, { "epoch": 0.12168933428775948, "grad_norm": 0.24548886716365814, "learning_rate": 0.0002, "loss": 0.4263, "step": 170 }, { "epoch": 0.12884753042233357, "grad_norm": 0.237931028008461, "learning_rate": 0.0002, "loss": 0.4323, "step": 180 }, { "epoch": 0.13600572655690765, "grad_norm": 0.21747586131095886, "learning_rate": 0.0002, "loss": 0.4161, "step": 190 }, { "epoch": 0.14316392269148176, "grad_norm": 0.24018722772598267, "learning_rate": 0.0002, "loss": 0.3944, "step": 200 }, { "epoch": 0.14316392269148176, "eval_loss": 0.40698060393333435, "eval_runtime": 33.7394, "eval_samples_per_second": 17.457, "eval_steps_per_second": 8.743, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.5912916934656e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }